From dcf417821219b618a6ce2f17dd5b198fa5d0b369 Mon Sep 17 00:00:00 2001
From: roytam1 <roytam@gmail.com>
Date: Wed, 22 Nov 2023 17:12:24 +0800
Subject: [PATCH] import changes from `dev' branch of rmottola/Arctic-Fox: -
 Bug 1218706 - Make 'unicode-bidi: isolate' the default for elements with a
 dir attribute. r=dbaron,jfkthame (037a5aaf57) - Bug 743198 part 7 - Add
 :fullscreen pseudo class. r=heycam (05eb82ccc9) - missing bits of  Bug
 1242904 - Update Brotli t (f4f569d859) - Bug 1235298 - Annotate intentional
 switch fallthroughs to suppress -Wimplicit-fallthrough warnings in netwerk/.
 r=mcmanus (c4ae414769) - Bug 1244841 - Don't measure SharedArrayBuffer
 objects multiple times. r=lth. (e9b665dc82) - Bug 234031 - broken parsing of
 xlen in gzip rfc 1952 r=mcmanus (c905b50417) - Bug 1239605 - filter
 insertions into the global object. r=jandem (35520bc5c8) - Bug 1248681 - Warn
 if the result of putWrapper goes unused; r=sfink (8051888ef8) - Bug 1246293 -
 Fix a typo in DefineSelfHostedProperty; r=till (5e05a76b2a) - Bug 1243808 -
 Allow modules to be compiled off main thread r=shu (51e5adca0a) - Bug 1241767
 - avoid default-only switch to placate MSVC. r=bbouvier (392aa8eda0) - Bug
 1239710 - Use CountingSort for Uint8Array and Int8Array; r=mrrrgn
 (6f394ab442) - Bug 1246860 - Preserve holes when sorting arrays with a custom
 comparator. r=till (725b091e55) - Bug 1247283 - Improve self-hosted
 Array.sort performance; r=till (ab1f3ddb74) - Bug 1248717 - Don't initialize
 variables in a for head with var, then use them later. r=jorendorff
 (87a77f9623) - Bug 1242196 - Use RadixSort for typed arrays. r=mrrrgn
 (a280ea097d) - Bug 1246927 - Fix OOM handling in usages of
 GlobalObject::maybeGetIntrinsicValue. r=arai (f7f3761c45) - Bug 1248405 -
 Recover from JSObject::getGroup OOM in CanOptimizeForDenseStorage. r=jandem
 (68ea32c044) - Bug 1225905 - create a mozharness script that manages each
 beet mover task logic, NPOTB DONTBUILD r=rail (076f25536f) - Bug 1246074 -
 add partials template config for mozharness beetmover, DONTBUILD r=rail
 (2e42c78d92) - Bug 1246074 - add partials mozharness beetmover, custom tc
 artifact location, DONTBUILD NPOTB r=rail (c74fe0755a) - Bug 1210538 - Add
 antivirus checks to release promotion graph a=rail (56de774389) - Bug 1238610
 - Restore compartment debug mode flags if adding a debuggee global fails
 r=jimb (f8a9a1fa85) - Bug 1243851 - Treat enter as shift+enter if input is
 valid but incomplete; r=fitzgen, bgrins (f27c959bc0) - Bug 1238610 - Fix
 implicit constructor errors r=me (6f26f92763) - Bug 1242111 - Handle OOM in
 UpdateExecutionObservabilityOfScriptsInZone. (r=jimb) (91b125725f) - Bug
 1240503 - Skip the initial block scope when unwinding scopes due to an
 exception that's thrown in the prologue before the scope chain is properly
 initialized for a script that starts with a block scope. (r=jorendorff)
 (0247fc8848) - Bug 1242111 - Fix references to oomTest. (r=me) (a9dc13a648) -
 Bug 1240546 - Handle OOM in updateObservesAllExecutionOnDebuggees. (r=jimb)
 (5d7e3a49f4) - Bug 1245961 - Throw a TypeError if less than one argument is
 supplied to isCompilableUnit;r=fitzgen (f896abb042) - Bug 1240803 - Handle
 OOM in replaceFrameGuts. (r=jimb) (2d43384c72) - Bug 1248094 - Simplify
 PCLocationMap with GCHashMap; r=fitzgen (b28d983bbd) - we don't care much
 abut 68k sadly (51c50300c5) - Bug 1233786 - JSScript::initScriptCounts should
 report OOMs. r=bhackett (eb42f7b8c1) - Bug 1233178 - Move ScriptCounts
 allocation outside the HashMap. r=bhackett (c3fa6d487c) - Bug 1141579 -
 Synchronize access to warmUpCount; r=jandem (a5b72cdf94) - Bug 1203696:
 Improve comments about lazily-initialized member of js::LazyScript. r=shu
 (caa895612d) - Bug 1221992 - Fix test using GetMostRecentWindow from the
 child process. r=smaug (07affe8195) - Bug 1235636 - rewrite PCToLineNumber;
 r=fitzgen (9dc9ff013e) - Bug 1232100 - "Check charsWritten in non-debug
 builds.". r=jcoppeard (fc5a64e621) - last bit of  Bug 1197932 (86277af34e) -
 Bug 1067049 - Implement arguments[@@iterator]. r=evilpie (543e513269) - Bug
 1248930 - Use Int32Value in ArrayBufferObject::BYTE_LENGTH_SLOT. r=lth
 (71e3a9ee51) - Bug 1113685 - Report the right name when calling selfhosted
 functions on incompatible objects. r=till (51f68d4f8d) - some symbol cleanup
 (83fca10034) - Bug 1165011 - Remove Symbol_isRegExp. r=jorendorff
 (46a2d293cc) - Bug 1122900: Make libyuv compile with MSVC 2015, r=rjesup.
 (9e147c7ba7) - bug 1241453 - clean up GetAccessibleWrap() r=davidb
 (01e37c5012) - Bug 1243331 - Prevent G_DEFINE_TYPE_EXTENDED macro from
 producing a fatal warning, r=tbsaunde (8bf031c4b9) - Bug 1232527 - Call into
 WMF PDM to determine if WMF can decode instead of using
 GMPVideoDecoderTrialCreator. r=jwwang (7d2b1f16f1) - Bug 1229475 - Fix
 gen-sources for libopus 1.1.1. r=cpearce (1e5a768d94) - Bug 1229475 - Update
 libopus to 1.1.1 release. r=jmspeex (0b73488ab3) - Bug 1139087 - Add
 moz.build bugzilla metadata for codecs. r=kinetik,gps (3d906f8f5a) - Bug
 1229475 - libopus: Patch out asm flags for run_analysis. r=jmspeex
 (bfa15edac1) - Bug 1229475 - Fix unified build. r=cpearce (bbeda94cfc) - Bug
 1239078 - Update libopus to 1.1.2 release. r=kinetik (9990b00867) - Bug
 1239078 - Bump libopus update script for 1.1.2. r=kinetik (eecd46d3d3) - bug
 1230377 - part 1/2: ensure nsKeyObject releases NSS resources on shutdown
 r=jcj (9ceefecbea) - bug 1230377 - part 2/2: simplify nsIKeyObject and
 nsIKeyObjectFactory r=jcj (1297d168b7) - bug 1239609 - audit
 nsNSSShutDownObject destructors for correctness r=Cykesiopka,sworkman
 (c78404e52a) - Bug 1246263 - fix unified build pollution r=valentin
 (f8db2c45cf) - Bug 1214981 - Disable output stream buffering. r=keeler
 (d9e7a1b863) - bug 1240173 - improve nsIX509Cert.dbKey r=Cykesiopka
 (0c0fc8e8a3) - Bug 1238042 - Extract a helper function to check if a JSObject
 is a global with a particular about: URI. r=ehsan (d065854725) - Bug 1244118
 - Shutdown threadpool when xpcom-shutdown-threads happened. r=roc
 (e6ef2768b6) - Bug 1201685 - Limit the number of indexedDB open() calls in
 IndexedDBHelper r=gwagner (a4fc80fca2) - Bug 1244049 - Part 3: Replace the
 type of nsCSSSelector::mPseudoType. r=dbaron (c817ee6145) - Bug 1244049 -
 Part 4: Define CSSPseudoElementTypeBase. r=dbaron (94dab59375) - Bug 1246846
 (part 1) - Avoid nsTHashtable::RawRemoveEntry() in dom/.  r=bz. (5371e478da)
 - Bug 1246846 (part 2) - Avoid nsTHashtable::RawRemoveEntry() in
 nsPermissionManager. r=mconnor. (d7a1143ed1) - Bug 1246846 (part 3) - Avoid
 nsTHashtable::RawRemoveEntry() in gfxFontconfigUtils. r=jfkthame.
 (d23259ca8e) - Bug 1246846 (part 4) - Avoid nsTHashtable::RawRemoveEntry() in
 FramePropertyTable. r=roc. (7de416abfa) - Bug 1238404 - Use 'using' directive
 instead of having separate Dispatch impl in subclasses of nsIEventTarget.
 r=froydnj (43028ed3b3) - Bug 938699 - Remove FindElementWithViewId from
 nsIDOMWindowUtils.idl and nsDOMWindowUtils.cpp. r=kats (b49d2b5e6a) - missing
 bit of Bug 1210294 - Remove the release-mode IsCallerChrome assertions
 (a555243280)

---
 accessible/atk/AccessibleWrap.cpp             |  14 +-
 accessible/atk/moz.build                      |   7 +-
 dom/animation/PseudoElementHashEntry.h        |   2 +
 dom/base/FeedWriterEnabled.h                  |  33 +-
 dom/base/ImageEncoder.cpp                     |  38 +-
 dom/base/ImageEncoder.h                       |   1 +
 dom/base/IndexedDBHelper.jsm                  |  42 +-
 dom/base/ShadowRoot.cpp                       |   2 +-
 dom/base/WebSocket.cpp                        |   5 +-
 dom/base/nsContentUtils.cpp                   |  34 +
 dom/base/nsContentUtils.h                     |   8 +
 dom/base/nsDOMWindowUtils.cpp                 |  12 -
 dom/base/nsDocument.cpp                       |   2 +-
 ..._exception_options_from_jsimplemented.html |   4 +-
 ...promise_rejections_from_jsimplemented.html |   4 +-
 dom/interfaces/base/nsIDOMWindowUtils.idl     |   7 -
 dom/media/platforms/wmf/WMFDecoderModule.cpp  |  16 +-
 dom/media/platforms/wmf/WMFDecoderModule.h    |   9 +
 dom/wifi/WifiCertService.cpp                  |   6 +
 dom/xul/XULDocument.cpp                       |   2 +-
 extensions/cookie/nsPermissionManager.cpp     |   4 +-
 gfx/thebes/gfxFontconfigUtils.cpp             |   8 +-
 ipc/chromium/src/build/build_config.h         |   5 +-
 js/public/MemoryMetrics.h                     |   5 +-
 js/src/builtin/ModuleObject.cpp               |  68 +-
 js/src/builtin/ModuleObject.h                 |  15 +-
 js/src/builtin/Sorting.js                     | 209 ++++-
 js/src/builtin/TypedArray.js                  |  17 +-
 js/src/doc/Debugger/Debugger.md               |  10 +
 js/src/frontend/BytecodeCompiler.cpp          |  24 +-
 js/src/frontend/BytecodeCompiler.h            |   6 +-
 js/src/gc/Zone.h                              |  11 +-
 .../tests/collections/constructor-errors.js   |   5 +-
 .../tests/debug/Debugger-isCompilableUnit.js  |  58 ++
 .../Script-getOffsetsCoverage-bug1233178.js   |  13 +
 js/src/jit-test/tests/debug/bug-1238610.js    |  27 +
 js/src/jit-test/tests/debug/bug1240546.js     |  12 +
 js/src/jit-test/tests/debug/bug1240803.js     |  24 +
 js/src/jit-test/tests/debug/bug1242111.js     |  11 +
 js/src/jit-test/tests/for-of/non-iterable.js  |   3 -
 js/src/jit-test/tests/gc/bug-1240503.js       |   8 +
 js/src/jit-test/tests/ion/bug832058.js        |   3 +-
 js/src/jit-test/tests/modules/global-scope.js |  32 +
 .../tests/modules/off-thread-compile.js       |  16 +
 .../method-called-on-incompatible.js          |   9 +
 js/src/jsapi.cpp                              |  32 +-
 js/src/jsarray.cpp                            |   6 +-
 js/src/jscompartment.cpp                      |   5 +-
 js/src/jscompartment.h                        |  23 +-
 js/src/jsobj.cpp                              |   2 +-
 js/src/jsprf.cpp                              |   2 +-
 js/src/jsscript.cpp                           |  77 +-
 js/src/jsscript.h                             |  17 +-
 js/src/jsversion.h                            |   5 +-
 js/src/shell/js.cpp                           | 116 ++-
 .../tests/ecma_6/Array/iterator_edge_cases.js |   2 +-
 js/src/tests/ecma_6/Array/sort_holes.js       |  72 ++
 .../ecma_6/Function/arguments-iterator.js     | 167 ++++
 .../ecma_6/String/iterator_edge_cases.js      |   2 +-
 js/src/vm/ArgumentsObject.cpp                 |  43 +
 js/src/vm/ArgumentsObject.h                   |  20 +-
 js/src/vm/ArrayBufferObject.cpp               |  13 +-
 js/src/vm/ArrayBufferObject.h                 |   4 +-
 js/src/vm/CallNonGenericMethod.cpp            |   4 +
 js/src/vm/CommonPropertyNames.h               |  10 +-
 js/src/vm/Debugger.cpp                        | 169 +++-
 js/src/vm/Debugger.h                          |   2 +
 js/src/vm/GlobalObject.cpp                    |  42 +-
 js/src/vm/GlobalObject.h                      |  44 +-
 js/src/vm/HelperThreads.cpp                   | 257 ++++--
 js/src/vm/HelperThreads.h                     |  44 +-
 js/src/vm/NativeObject.cpp                    |   5 +
 js/src/vm/SavedStacks.cpp                     |  22 +-
 js/src/vm/SavedStacks.h                       |  35 +-
 js/src/vm/ScopeObject.cpp                     |  13 +
 js/src/vm/SelfHosting.cpp                     |  93 ++
 js/src/vm/SelfHosting.h                       |  10 +-
 js/src/vm/SharedArrayObject.cpp               |  16 +-
 js/src/vm/SharedArrayObject.h                 |   6 +-
 js/src/vm/Stack.cpp                           |   1 +
 js/xpconnect/src/XPCJSRuntime.cpp             |  24 +-
 layout/base/FramePropertyTable.cpp            |   8 +-
 layout/base/FramePropertyTable.h              |   3 +
 layout/inspector/inDOMUtils.cpp               |   5 +-
 layout/media/symbols.def.in                   |   1 +
 layout/reftests/bidi/613157-2-ref.html        |   2 +-
 layout/reftests/bidi/698706-1-ref.html        |   2 +-
 layout/reftests/bidi/83958-2c.html            |  33 -
 layout/reftests/bidi/reftest.list             |   3 +-
 layout/style/StyleRule.cpp                    |   6 +-
 layout/style/StyleRule.h                      |  12 +-
 layout/style/html.css                         | 113 +--
 layout/style/nsCSSPseudoClassList.h           |   1 +
 layout/style/nsCSSPseudoElements.cpp          |   9 +-
 layout/style/nsCSSPseudoElements.h            |   3 +-
 layout/style/nsCSSRuleProcessor.cpp           |  13 +-
 layout/style/nsStyleContext.cpp               |   3 +-
 layout/style/test/test_default_bidi_css.html  |  12 +-
 layout/style/ua.css                           |   4 +-
 .../0.1/ClearKeySessionManager.cpp            |  23 +-
 media/libogg/moz.build                        |   3 +
 media/libopus/README_MOZILLA                  |   2 +-
 media/libopus/celt/_kiss_fft_guts.h           |   7 +-
 media/libopus/celt/arch.h                     |  33 +-
 media/libopus/celt/arm/arm2gnu.pl             |  53 +-
 media/libopus/celt/arm/arm_celt_map.c         |  82 +-
 media/libopus/celt/arm/armcpu.c               |   6 +-
 media/libopus/celt/arm/celt_ne10_fft.c        | 174 ++++
 media/libopus/celt/arm/celt_ne10_mdct.c       | 258 ++++++
 media/libopus/celt/arm/celt_neon_intr.c       | 252 +++++
 media/libopus/celt/arm/celt_pitch_xcorr_arm.s |  24 +-
 media/libopus/celt/arm/fft_arm.h              |  72 ++
 media/libopus/celt/arm/fixed_armv4.h          |   4 +
 media/libopus/celt/arm/fixed_armv5e.h         |  35 +
 media/libopus/celt/arm/mdct_arm.h             |  60 ++
 media/libopus/celt/arm/pitch_arm.h            |  15 +-
 media/libopus/celt/bands.c                    | 235 ++---
 media/libopus/celt/bands.h                    |  24 +-
 media/libopus/celt/celt.c                     |  96 +-
 media/libopus/celt/celt.h                     |  23 +-
 media/libopus/celt/celt_decoder.c             | 363 ++++----
 media/libopus/celt/celt_encoder.c             | 386 ++++----
 media/libopus/celt/celt_lpc.c                 |  16 +-
 media/libopus/celt/celt_lpc.h                 |  19 +-
 media/libopus/celt/cpu_support.h              |  20 +-
 media/libopus/celt/cwrs.c                     |  40 +-
 media/libopus/celt/cwrs.h                     |   2 +-
 media/libopus/celt/entcode.c                  |  60 ++
 media/libopus/celt/entcode.h                  |  35 +
 media/libopus/celt/entdec.c                   |   2 +-
 media/libopus/celt/entenc.c                   |   4 +-
 media/libopus/celt/fixed_debug.h              |  11 +
 media/libopus/celt/fixed_generic.h            |  19 +-
 media/libopus/celt/float_cast.h               |   4 +-
 media/libopus/celt/kiss_fft.c                 | 497 ++++------
 media/libopus/celt/kiss_fft.h                 |  75 +-
 media/libopus/celt/mdct.c                     | 210 +++--
 media/libopus/celt/mdct.h                     |  56 +-
 media/libopus/celt/mips/celt_mipsr1.h         | 151 +++
 .../libopus/celt/mips/fixed_generic_mipsr1.h  | 126 +++
 media/libopus/celt/mips/kiss_fft_mipsr1.h     | 167 ++++
 media/libopus/celt/mips/mdct_mipsr1.h         | 288 ++++++
 media/libopus/celt/mips/pitch_mipsr1.h        | 161 ++++
 media/libopus/celt/mips/vq_mipsr1.h           | 125 +++
 media/libopus/celt/modes.c                    |   8 +-
 media/libopus/celt/modes.h                    |   8 -
 media/libopus/celt/os_support.h               |   6 +-
 media/libopus/celt/pitch.c                    |  77 +-
 media/libopus/celt/pitch.h                    |  67 +-
 media/libopus/celt/quant_bands.c              |   2 +-
 media/libopus/celt/rate.c                     |   9 +-
 media/libopus/celt/rate.h                     |   2 +-
 media/libopus/celt/stack_alloc.h              |   8 +-
 media/libopus/celt/static_modes_fixed.h       | 621 +++++++++----
 .../celt/static_modes_fixed_arm_ne10.h        | 388 ++++++++
 media/libopus/celt/static_modes_float.h       | 613 +++++++++----
 .../celt/static_modes_float_arm_ne10.h        | 404 +++++++++
 media/libopus/celt/vq.c                       |  55 +-
 media/libopus/celt/vq.h                       |   9 +-
 media/libopus/celt/x86/celt_lpc_sse.c         | 132 +++
 media/libopus/celt/x86/celt_lpc_sse.h         |  68 ++
 media/libopus/celt/x86/pitch_sse.c            | 185 ++++
 media/libopus/celt/x86/pitch_sse.h            | 260 +++---
 media/libopus/celt/x86/pitch_sse2.c           |  95 ++
 media/libopus/celt/x86/pitch_sse4_1.c         | 195 ++++
 media/libopus/celt/x86/x86_celt_map.c         | 155 ++++
 media/libopus/celt/x86/x86cpu.c               | 157 ++++
 media/libopus/celt/x86/x86cpu.h               |  93 ++
 media/libopus/gen-sources.py                  |   2 +-
 media/libopus/include/opus.h                  |   5 +-
 media/libopus/include/opus_defines.h          |  97 +-
 media/libopus/include/opus_multistream.h      |   6 +-
 media/libopus/moz.build                       |   4 +-
 media/libopus/nonunified.patch                |  38 +
 media/libopus/silk/A2NLSF.c                   |  19 +-
 media/libopus/silk/API.h                      |   3 +-
 media/libopus/silk/CNG.c                      |  21 +-
 media/libopus/silk/LPC_analysis_filter.c      |   6 +-
 media/libopus/silk/NLSF_del_dec_quant.c       |  40 +-
 media/libopus/silk/NSQ.c                      |  13 +-
 media/libopus/silk/NSQ_del_dec.c              |   9 +-
 media/libopus/silk/PLC.c                      |  66 +-
 media/libopus/silk/PLC.h                      |   3 +-
 media/libopus/silk/SigProc_FIX.h              |  29 +-
 media/libopus/silk/VAD.c                      |   9 +-
 media/libopus/silk/VQ_WMat_EC.c               |  10 +-
 media/libopus/silk/code_signs.c               |   4 +-
 media/libopus/silk/control_SNR.c              |   5 -
 media/libopus/silk/control_codec.c            |  10 +-
 media/libopus/silk/dec_API.c                  |  42 +-
 media/libopus/silk/decode_core.c              |   5 +-
 media/libopus/silk/decode_frame.c             |  25 +-
 media/libopus/silk/decode_pulses.c            |  10 +-
 media/libopus/silk/define.h                   |   2 +-
 media/libopus/silk/enc_API.c                  |  17 +-
 media/libopus/silk/encode_pulses.c            |   6 +-
 .../silk/fixed/LTP_analysis_filter_FIX.c      |  19 +-
 media/libopus/silk/fixed/burg_modified_FIX.c  |  36 +-
 media/libopus/silk/fixed/corrMatrix_FIX.c     |  10 +-
 media/libopus/silk/fixed/encode_frame_FIX.c   |  14 +-
 media/libopus/silk/fixed/find_LPC_FIX.c       |   2 +-
 media/libopus/silk/fixed/find_LTP_FIX.c       |   7 +-
 .../libopus/silk/fixed/find_pitch_lags_FIX.c  |   2 +-
 .../libopus/silk/fixed/find_pred_coefs_FIX.c  |  15 +-
 media/libopus/silk/fixed/main_FIX.h           |  23 +-
 .../mips/noise_shape_analysis_FIX_mipsr1.h    | 336 +++++++
 .../silk/fixed/mips/prefilter_FIX_mipsr1.h    | 184 ++++
 .../mips/warped_autocorrelation_FIX_mipsr1.h  | 165 ++++
 .../silk/fixed/noise_shape_analysis_FIX.c     |   6 +
 .../silk/fixed/pitch_analysis_core_FIX.c      |  22 +-
 media/libopus/silk/fixed/prefilter_FIX.c      |  16 +-
 .../libopus/silk/fixed/residual_energy_FIX.c  |   5 +-
 media/libopus/silk/fixed/structs_FIX.h        |   1 +
 media/libopus/silk/fixed/vector_ops_FIX.c     |  10 +-
 .../silk/fixed/warped_autocorrelation_FIX.c   |   7 +
 .../silk/fixed/x86/burg_modified_FIX_sse.c    | 375 ++++++++
 .../silk/fixed/x86/prefilter_FIX_sse.c        | 160 ++++
 .../silk/fixed/x86/vector_ops_FIX_sse.c       |  88 ++
 media/libopus/silk/float/encode_frame_FLP.c   |   2 +-
 media/libopus/silk/float/find_LPC_FLP.c       |   2 +-
 .../libopus/silk/float/find_pred_coefs_FLP.c  |   7 +-
 media/libopus/silk/float/main_FLP.h           |   3 +-
 .../silk/float/pitch_analysis_core_FLP.c      |   4 +-
 media/libopus/silk/float/structs_FLP.h        |   1 +
 media/libopus/silk/float/wrappers_FLP.c       |   9 +-
 media/libopus/silk/log2lin.c                  |   6 +-
 media/libopus/silk/macros.h                   |  40 +-
 media/libopus/silk/main.h                     |  55 +-
 media/libopus/silk/mips/NSQ_del_dec_mipsr1.h  | 405 +++++++++
 media/libopus/silk/mips/macros_mipsr1.h       |  92 ++
 media/libopus/silk/mips/sigproc_fix_mipsr1.h  |  65 ++
 media/libopus/silk/quant_LTP_gains.c          |  25 +-
 media/libopus/silk/resampler_rom.c            |  54 +-
 media/libopus/silk/shell_coder.c              |   8 +-
 media/libopus/silk/structs.h                  |   2 +-
 media/libopus/silk/sum_sqr_shift.c            |   1 +
 media/libopus/silk/tables.h                   |   6 +-
 media/libopus/silk/tuning_parameters.h        |   2 +-
 media/libopus/silk/x86/NSQ_del_dec_sse.c      | 857 ++++++++++++++++++
 media/libopus/silk/x86/NSQ_sse.c              | 720 +++++++++++++++
 media/libopus/silk/x86/SigProc_FIX_sse.h      |  94 ++
 media/libopus/silk/x86/VAD_sse.c              | 277 ++++++
 media/libopus/silk/x86/VQ_WMat_EC_sse.c       | 142 +++
 media/libopus/silk/x86/main_sse.h             | 276 ++++++
 media/libopus/silk/x86/x86_silk_map.c         | 174 ++++
 media/libopus/sources.mozbuild                |  44 +-
 media/libopus/src/analysis.c                  |  44 +-
 media/libopus/src/analysis.h                  |  17 +-
 media/libopus/src/mlp.c                       | 117 +--
 media/libopus/src/mlp.h                       |   8 +-
 media/libopus/src/mlp_data.c                  |   4 +
 media/libopus/src/opus.c                      |  21 +
 media/libopus/src/opus_decoder.c              |  91 +-
 media/libopus/src/opus_encoder.c              | 101 ++-
 media/libopus/src/opus_multistream_decoder.c  |   4 +-
 media/libopus/src/opus_multistream_encoder.c  | 102 ++-
 media/libopus/src/opus_private.h              |  19 +-
 media/libopus/src/repacketizer.c              |   5 +-
 media/libopus/update.sh                       |   3 +-
 media/libvpx/vp8/common/postproc.c            |   2 +-
 media/libyuv/source/compare_win.cc            |   8 +-
 media/libyuv/source/rotate.cc                 |   4 +-
 media/libyuv/source/row_win.cc                | 288 +++---
 media/libyuv/source/scale_win.cc              |  54 +-
 netwerk/base/BackgroundFileSaver.cpp          |   6 +
 netwerk/base/nsSocketTransportService2.h      |   5 +-
 netwerk/base/nsStreamTransportService.h       |   5 +-
 netwerk/base/nsURLHelper.cpp                  |   2 +-
 netwerk/cache/nsDiskCacheDevice.cpp           |  21 +-
 netwerk/cache2/CacheHashUtils.cpp             |  25 +-
 netwerk/cache2/CacheIndex.cpp                 |   6 +-
 netwerk/cache2/CacheStorageService.cpp        |   2 +-
 netwerk/cookie/nsCookieService.cpp            |   9 +-
 netwerk/protocol/http/Http2Stream.cpp         |   3 +-
 netwerk/protocol/http/SpdyStream31.cpp        |   2 +-
 netwerk/protocol/http/nsHttpTransaction.cpp   |   3 +-
 .../protocol/websocket/WebSocketChannel.cpp   |   1 +
 .../converters/mozTXTToHTMLConv.cpp           |   4 +-
 .../converters/nsHTTPCompressConv.cpp         |   8 +-
 release/docker/beet-mover/Dockerfile          |  19 +
 .../manager/ssl/nsCertOverrideService.cpp     |  71 +-
 security/manager/ssl/nsClientAuthRemember.cpp |  21 +-
 security/manager/ssl/nsCryptoHash.cpp         |   2 +-
 security/manager/ssl/nsIKeyModule.idl         |  34 +-
 security/manager/ssl/nsIX509Cert.idl          |   4 +-
 security/manager/ssl/nsKeyModule.cpp          | 181 ++--
 security/manager/ssl/nsKeyModule.h            |  36 +-
 security/manager/ssl/nsNSSCertificate.cpp     |  51 +-
 security/manager/ssl/nsNSSCertificate.h       |  11 -
 security/manager/ssl/nsNSSCertificateDB.cpp   |  91 +-
 .../ssl/nsNSSCertificateFakeTransport.cpp     |   2 +-
 security/manager/ssl/nsNSSShutDown.h          |  16 +
 .../manager/ssl/tests/unit/test_cert_dbKey.js | 152 ++++
 .../ssl/tests/unit/test_weak_crypto.js        |  43 +-
 security/manager/ssl/tests/unit/xpcshell.ini  |   1 +
 .../configs/beetmover/en_us.yml.tmpl          |  84 ++
 .../configs/beetmover/partials.yml.tmpl       |  16 +
 .../external_tools/extract_and_run_command.py | 205 +++++
 .../mozharness/scripts/release/beet_mover.py  | 343 +++++++
 xpcom/threads/LazyIdleThread.h                |   5 +-
 xpcom/threads/SharedThreadPool.h              |   6 +-
 xpcom/threads/nsIEventTarget.idl              |   7 +-
 xpcom/threads/nsIThread.idl                   |   2 +-
 xpcom/threads/nsIThreadInternal.idl           |   2 +-
 xpcom/threads/nsIThreadPool.idl               |   2 +-
 xpcom/threads/nsThread.h                      |   5 +-
 xpcom/threads/nsThreadPool.h                  |   5 +-
 307 files changed, 15145 insertions(+), 3072 deletions(-)
 create mode 100644 js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js
 create mode 100644 js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js
 create mode 100644 js/src/jit-test/tests/debug/bug-1238610.js
 create mode 100644 js/src/jit-test/tests/debug/bug1240546.js
 create mode 100644 js/src/jit-test/tests/debug/bug1240803.js
 create mode 100644 js/src/jit-test/tests/debug/bug1242111.js
 create mode 100644 js/src/jit-test/tests/gc/bug-1240503.js
 create mode 100644 js/src/jit-test/tests/modules/global-scope.js
 create mode 100644 js/src/jit-test/tests/modules/off-thread-compile.js
 create mode 100644 js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js
 create mode 100644 js/src/tests/ecma_6/Array/sort_holes.js
 create mode 100644 js/src/tests/ecma_6/Function/arguments-iterator.js
 delete mode 100644 layout/reftests/bidi/83958-2c.html
 create mode 100644 media/libopus/celt/arm/celt_ne10_fft.c
 create mode 100644 media/libopus/celt/arm/celt_ne10_mdct.c
 create mode 100644 media/libopus/celt/arm/celt_neon_intr.c
 create mode 100644 media/libopus/celt/arm/fft_arm.h
 create mode 100644 media/libopus/celt/arm/mdct_arm.h
 create mode 100644 media/libopus/celt/mips/celt_mipsr1.h
 create mode 100644 media/libopus/celt/mips/fixed_generic_mipsr1.h
 create mode 100644 media/libopus/celt/mips/kiss_fft_mipsr1.h
 create mode 100644 media/libopus/celt/mips/mdct_mipsr1.h
 create mode 100644 media/libopus/celt/mips/pitch_mipsr1.h
 create mode 100644 media/libopus/celt/mips/vq_mipsr1.h
 create mode 100644 media/libopus/celt/static_modes_fixed_arm_ne10.h
 create mode 100644 media/libopus/celt/static_modes_float_arm_ne10.h
 create mode 100644 media/libopus/celt/x86/celt_lpc_sse.c
 create mode 100644 media/libopus/celt/x86/celt_lpc_sse.h
 create mode 100644 media/libopus/celt/x86/pitch_sse.c
 create mode 100644 media/libopus/celt/x86/pitch_sse2.c
 create mode 100644 media/libopus/celt/x86/pitch_sse4_1.c
 create mode 100644 media/libopus/celt/x86/x86_celt_map.c
 create mode 100644 media/libopus/celt/x86/x86cpu.c
 create mode 100644 media/libopus/celt/x86/x86cpu.h
 create mode 100644 media/libopus/nonunified.patch
 create mode 100644 media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
 create mode 100644 media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h
 create mode 100644 media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
 create mode 100644 media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
 create mode 100644 media/libopus/silk/fixed/x86/prefilter_FIX_sse.c
 create mode 100644 media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c
 create mode 100644 media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
 create mode 100644 media/libopus/silk/mips/macros_mipsr1.h
 create mode 100644 media/libopus/silk/mips/sigproc_fix_mipsr1.h
 create mode 100644 media/libopus/silk/x86/NSQ_del_dec_sse.c
 create mode 100644 media/libopus/silk/x86/NSQ_sse.c
 create mode 100644 media/libopus/silk/x86/SigProc_FIX_sse.h
 create mode 100644 media/libopus/silk/x86/VAD_sse.c
 create mode 100644 media/libopus/silk/x86/VQ_WMat_EC_sse.c
 create mode 100644 media/libopus/silk/x86/main_sse.h
 create mode 100644 media/libopus/silk/x86/x86_silk_map.c
 create mode 100644 release/docker/beet-mover/Dockerfile
 create mode 100644 security/manager/ssl/tests/unit/test_cert_dbKey.js
 create mode 100644 testing/mozharness/configs/beetmover/en_us.yml.tmpl
 create mode 100644 testing/mozharness/configs/beetmover/partials.yml.tmpl
 create mode 100644 testing/mozharness/external_tools/extract_and_run_command.py
 create mode 100644 testing/mozharness/scripts/release/beet_mover.py

diff --git a/accessible/atk/AccessibleWrap.cpp b/accessible/atk/AccessibleWrap.cpp
index 88c83d19d8..dd25b25d7b 100644
--- a/accessible/atk/AccessibleWrap.cpp
+++ b/accessible/atk/AccessibleWrap.cpp
@@ -1059,13 +1059,13 @@ GetAccessibleWrap(AtkObject* aAtkObj)
   NS_ENSURE_TRUE(isMAIObject || MAI_IS_ATK_SOCKET(aAtkObj),
                  nullptr);
 
-  uintptr_t accWrapPtr = isMAIObject ?
-    MAI_ATK_OBJECT(aAtkObj)->accWrap.Bits() :
-    reinterpret_cast<uintptr_t>(MAI_ATK_SOCKET(aAtkObj)->accWrap);
-  if (accWrapPtr & IS_PROXY)
-    return nullptr;
-
-  AccessibleWrap* accWrap = reinterpret_cast<AccessibleWrap*>(accWrapPtr);
+  AccessibleWrap* accWrap = nullptr;
+  if (isMAIObject) {
+    Accessible* acc = MAI_ATK_OBJECT(aAtkObj)->accWrap.AsAccessible();
+    accWrap = static_cast<AccessibleWrap*>(acc);
+  } else {
+    accWrap = MAI_ATK_SOCKET(aAtkObj)->accWrap;
+  }
 
   // Check if the accessible was deconstructed.
   if (!accWrap)
diff --git a/accessible/atk/moz.build b/accessible/atk/moz.build
index 67a59e6536..dae3f86a60 100644
--- a/accessible/atk/moz.build
+++ b/accessible/atk/moz.build
@@ -52,6 +52,7 @@ if CONFIG['MOZ_ENABLE_DBUS']:
 
 include('/ipc/chromium/chromium-config.mozbuild')
 
-if CONFIG['CLANG_CXX']:
-    # Suppress clang warning about unused function from gobject's RTTI macros.
-    CXXFLAGS += ['-Wno-unused-function']
+if CONFIG['CLANG_CXX'] or CONFIG['GNU_CXX']:
+    # Used in G_DEFINE_TYPE_EXTENDED macro, probably fixed in newer glib /
+    # gobject headers. See bug 1243331 comment 3.
+    CXXFLAGS += ['-Wno-unused-local-typedefs']
diff --git a/dom/animation/PseudoElementHashEntry.h b/dom/animation/PseudoElementHashEntry.h
index c1f90e670c..b84f7c018d 100644
--- a/dom/animation/PseudoElementHashEntry.h
+++ b/dom/animation/PseudoElementHashEntry.h
@@ -49,6 +49,8 @@ public:
       return 0;
 
     // Convert the scoped enum into an integer while adding it to hash.
+    // Note: CSSPseudoElementTypeBase is uint8_t, so we convert it into
+    //       uint8_t directly to avoid including the header.
     return mozilla::HashGeneric(aKey->mElement,
                                 static_cast<uint8_t>(aKey->mPseudoType));
   }
diff --git a/dom/base/FeedWriterEnabled.h b/dom/base/FeedWriterEnabled.h
index 52725bde3f..1bf39eba98 100644
--- a/dom/base/FeedWriterEnabled.h
+++ b/dom/base/FeedWriterEnabled.h
@@ -5,43 +5,14 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "js/TypeDecls.h"
-#include "nsGlobalWindow.h"
-#include "nsIPrincipal.h"
-#include "nsIURI.h"
-#include "nsString.h"
-#include "xpcpublic.h"
+#include "nsContentUtils.h"
 
 namespace mozilla {
 
 struct FeedWriterEnabled {
   static bool IsEnabled(JSContext* cx, JSObject* aGlobal)
   {
-    // Make sure the global is a window
-    nsGlobalWindow* win = xpc::WindowGlobalOrNull(aGlobal);
-    if (!win) {
-      return false;
-    }
-
-    // Make sure that the principal is about:feeds.
-    nsCOMPtr<nsIPrincipal> principal = win->GetPrincipal();
-    NS_ENSURE_TRUE(principal, false);
-    nsCOMPtr<nsIURI> uri;
-    principal->GetURI(getter_AddRefs(uri));
-    if (!uri) {
-      return false;
-    }
-
-    // First check the scheme to avoid getting long specs in the common case.
-    bool isAbout = false;
-    uri->SchemeIs("about", &isAbout);
-    if (!isAbout) {
-      return false;
-    }
-
-    // Now check the spec itself
-    nsAutoCString spec;
-    uri->GetSpec(spec);
-    return spec.EqualsLiteral("about:feeds");
+    return nsContentUtils::IsSpecificAboutPage(aGlobal, "about:feeds");
   }
 };
 
diff --git a/dom/base/ImageEncoder.cpp b/dom/base/ImageEncoder.cpp
index bd8c5b18f1..5bbad65d68 100644
--- a/dom/base/ImageEncoder.cpp
+++ b/dom/base/ImageEncoder.cpp
@@ -5,7 +5,6 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ImageEncoder.h"
-#include "mozilla/ClearOnShutdown.h"
 #include "mozilla/dom/CanvasRenderingContext2D.h"
 #include "mozilla/gfx/2D.h"
 #include "mozilla/gfx/DataSurfaceHelpers.h"
@@ -496,6 +495,38 @@ ImageEncoder::GetImageEncoder(nsAString& aType)
   return encoder.forget();
 }
 
+class EncoderThreadPoolTerminator final : public nsIObserver
+{
+  public:
+    NS_DECL_ISUPPORTS
+
+    NS_IMETHODIMP Observe(nsISupports *, const char *topic, const char16_t *) override
+    {
+      NS_ASSERTION(!strcmp(topic, "xpcom-shutdown-threads"),
+                   "Unexpected topic");
+      if (ImageEncoder::sThreadPool) {
+        ImageEncoder::sThreadPool->Shutdown();
+        ImageEncoder::sThreadPool = nullptr;
+      }
+      return NS_OK;
+    }
+  private:
+    ~EncoderThreadPoolTerminator() {}
+};
+
+NS_IMPL_ISUPPORTS(EncoderThreadPoolTerminator, nsIObserver)
+
+static void
+RegisterEncoderThreadPoolTerminatorObserver()
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+  NS_ASSERTION(os, "do_GetService failed");
+  os->AddObserver(new EncoderThreadPoolTerminator(),
+                  "xpcom-shutdown-threads",
+                  false);
+}
+
 /* static */
 nsresult
 ImageEncoder::EnsureThreadPool()
@@ -503,12 +534,13 @@ ImageEncoder::EnsureThreadPool()
   if (!sThreadPool) {
     nsCOMPtr<nsIThreadPool> threadPool = do_CreateInstance(NS_THREADPOOL_CONTRACTID);
     sThreadPool = threadPool;
+
     if (!NS_IsMainThread()) {
       NS_DispatchToMainThread(NS_NewRunnableFunction([]() -> void {
-        ClearOnShutdown(&sThreadPool);
+        RegisterEncoderThreadPoolTerminatorObserver();
       }));
     } else {
-      ClearOnShutdown(&sThreadPool);
+      RegisterEncoderThreadPoolTerminatorObserver();
     }
 
     const uint32_t kThreadLimit = 2;
diff --git a/dom/base/ImageEncoder.h b/dom/base/ImageEncoder.h
index de15902150..fd30a94d47 100644
--- a/dom/base/ImageEncoder.h
+++ b/dom/base/ImageEncoder.h
@@ -114,6 +114,7 @@ private:
   static StaticRefPtr<nsIThreadPool> sThreadPool;
 
   friend class EncodingRunnable;
+  friend class EncoderThreadPoolTerminator;
 };
 
 /**
diff --git a/dom/base/IndexedDBHelper.jsm b/dom/base/IndexedDBHelper.jsm
index a3024c009a..9cfc631d13 100644
--- a/dom/base/IndexedDBHelper.jsm
+++ b/dom/base/IndexedDBHelper.jsm
@@ -4,8 +4,8 @@
 
 "use strict";
 
-let DEBUG = 0;
-let debug;
+var DEBUG = 0;
+var debug;
 if (DEBUG) {
   debug = function (s) { dump("-*- IndexedDBHelper: " + s + "\n"); }
 } else {
@@ -24,13 +24,10 @@ Cu.importGlobalProperties(["indexedDB"]);
 XPCOMUtils.defineLazyModuleGetter(this, 'Services',
   'resource://gre/modules/Services.jsm');
 
-this.IndexedDBHelper = function IndexedDBHelper() {}
+this.IndexedDBHelper = function IndexedDBHelper() {
+}
 
 IndexedDBHelper.prototype = {
-
-  // Cache the database
-  _db: null,
-
   // Close the database
   close: function close() {
     if (this._db) {
@@ -48,8 +45,22 @@ IndexedDBHelper.prototype = {
    * @param failureCb
    *        Error callback to call when an error is encountered.
    */
-  open: function open(aSuccessCb, aFailureCb) {
+  open: function open(aCallback) {
+    if (aCallback && !this._waitForOpenCallbacks.has(aCallback)) {
+      this._waitForOpenCallbacks.add(aCallback);
+      if (this._waitForOpenCallbacks.size !== 1) {
+        return;
+      }
+    }
+
     let self = this;
+    let invokeCallbacks = err => {
+      for (let callback of self._waitForOpenCallbacks) {
+        callback(err);
+      }
+      self._waitForOpenCallbacks.clear();
+    };
+
     if (DEBUG) debug("Try to open database:" + self.dbName + " " + self.dbVersion);
     let req = indexedDB.open(this.dbName, this.dbVersion);
     req.onsuccess = function (event) {
@@ -58,7 +69,7 @@ IndexedDBHelper.prototype = {
       self._db.onversionchange = function(event) {
         if (DEBUG) debug("WARNING: DB modified from a different window.");
       }
-      aSuccessCb && aSuccessCb();
+      invokeCallbacks();
     };
 
     req.onupgradeneeded = function (aEvent) {
@@ -72,7 +83,7 @@ IndexedDBHelper.prototype = {
     };
     req.onerror = function (aEvent) {
       if (DEBUG) debug("Failed to open database: " + self.dbName);
-      aFailureCb && aFailureCb(aEvent.target.error.name);
+      invokeCallbacks(aEvent.target.error.name);
     };
     req.onblocked = function (aEvent) {
       if (DEBUG) debug("Opening database request is blocked.");
@@ -96,7 +107,13 @@ IndexedDBHelper.prototype = {
       }
       return;
     }
-    this.open(aSuccessCb, aFailureCb);
+    this.open(aError => {
+      if (aError) {
+        aFailureCb && aFailureCb(aError);
+      } else {
+        aSuccessCb && aSuccessCb();
+      }
+    });
   },
 
   /**
@@ -169,5 +186,8 @@ IndexedDBHelper.prototype = {
     this.dbName = aDBName;
     this.dbVersion = aDBVersion;
     this.dbStoreNames = aDBStoreNames;
+    // Cache the database.
+    this._db = null;
+    this._waitForOpenCallbacks = new Set();
   }
 }
diff --git a/dom/base/ShadowRoot.cpp b/dom/base/ShadowRoot.cpp
index 968602f6de..ed60e55ae2 100644
--- a/dom/base/ShadowRoot.cpp
+++ b/dom/base/ShadowRoot.cpp
@@ -219,7 +219,7 @@ ShadowRoot::RemoveFromIdTable(Element* aElement, nsIAtom* aId)
   if (entry) {
     entry->RemoveIdElement(aElement);
     if (entry->IsEmpty()) {
-      mIdentifierMap.RawRemoveEntry(entry);
+      mIdentifierMap.RemoveEntry(entry);
     }
   }
 }
diff --git a/dom/base/WebSocket.cpp b/dom/base/WebSocket.cpp
index 8b2260fb35..45268ff7bc 100644
--- a/dom/base/WebSocket.cpp
+++ b/dom/base/WebSocket.cpp
@@ -81,10 +81,7 @@ public:
   NS_DECL_NSIREQUEST
   NS_DECL_THREADSAFE_ISUPPORTS
   NS_DECL_NSIEVENTTARGET
-  // missing from NS_DECL_NSIEVENTTARGET because MSVC
-  nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-    return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-  }
+  using nsIEventTarget::Dispatch;
 
   explicit WebSocketImpl(WebSocket* aWebSocket)
   : mWebSocket(aWebSocket)
diff --git a/dom/base/nsContentUtils.cpp b/dom/base/nsContentUtils.cpp
index 5b70906ac5..e9b1154144 100644
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@@ -8920,3 +8920,37 @@ nsContentUtils::SerializeNodeToMarkup(nsINode* aRoot,
     }
   }
 }
+
+bool
+nsContentUtils::IsSpecificAboutPage(JSObject* aGlobal, const char* aUri)
+{
+  // aUri must start with about: or this isn't the right function to be using.
+  MOZ_ASSERT(strncmp(aUri, "about:", 6) == 0);
+
+  // Make sure the global is a window
+  nsGlobalWindow* win = xpc::WindowGlobalOrNull(aGlobal);
+  if (!win) {
+    return false;
+  }
+
+  // Make sure that the principal is about:feeds.
+  nsCOMPtr<nsIPrincipal> principal = win->GetPrincipal();
+  NS_ENSURE_TRUE(principal, false);
+  nsCOMPtr<nsIURI> uri;
+  principal->GetURI(getter_AddRefs(uri));
+  if (!uri) {
+    return false;
+  }
+
+  // First check the scheme to avoid getting long specs in the common case.
+  bool isAbout = false;
+  uri->SchemeIs("about", &isAbout);
+  if (!isAbout) {
+    return false;
+  }
+
+  // Now check the spec itself
+  nsAutoCString spec;
+  uri->GetSpec(spec);
+  return spec.EqualsASCII(aUri);
+}
diff --git a/dom/base/nsContentUtils.h b/dom/base/nsContentUtils.h
index 39b801a9e5..36618d264f 100644
--- a/dom/base/nsContentUtils.h
+++ b/dom/base/nsContentUtils.h
@@ -2600,6 +2600,14 @@ public:
                                     bool aDescendentsOnly,
                                     nsAString& aOut);
 
+  /*
+   * Returns true iff the provided JSObject is a global, and its URI matches
+   * the provided about: URI.
+   * @param aGlobal the JSObject whose URI to check, if it is a global.
+   * @param aUri the URI to match, e.g. "about:feeds"
+   */
+  static bool IsSpecificAboutPage(JSObject* aGlobal, const char* aUri);
+
 private:
   static bool InitializeEventTable();
 
diff --git a/dom/base/nsDOMWindowUtils.cpp b/dom/base/nsDOMWindowUtils.cpp
index 7408c31b3b..8f9a070b4c 100644
--- a/dom/base/nsDOMWindowUtils.cpp
+++ b/dom/base/nsDOMWindowUtils.cpp
@@ -1821,16 +1821,6 @@ nsDOMWindowUtils::GetFocusedInputType(char** aType)
   return NS_OK;
 }
 
-NS_IMETHODIMP
-nsDOMWindowUtils::FindElementWithViewId(nsViewID aID,
-                                        nsIDOMElement** aResult)
-{
-  MOZ_RELEASE_ASSERT(nsContentUtils::IsCallerChrome());
-
-  RefPtr<nsIContent> content = nsLayoutUtils::FindContentFor(aID);
-  return content ? CallQueryInterface(content, aResult) : NS_OK;
-}
-
 NS_IMETHODIMP
 nsDOMWindowUtils::GetViewId(nsIDOMElement* aElement, nsViewID* aResult)
 {
@@ -2623,8 +2613,6 @@ nsDOMWindowUtils::ComputeAnimationDistance(nsIDOMElement* aElement,
                                            const nsAString& aValue2,
                                            double* aResult)
 {
-  MOZ_RELEASE_ASSERT(nsContentUtils::LegacyIsCallerChromeOrNativeCode());
-
   nsresult rv;
   nsCOMPtr<nsIContent> content = do_QueryInterface(aElement, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
diff --git a/dom/base/nsDocument.cpp b/dom/base/nsDocument.cpp
index 745adb7fe5..0fe5204fe2 100644
--- a/dom/base/nsDocument.cpp
+++ b/dom/base/nsDocument.cpp
@@ -3052,7 +3052,7 @@ nsDocument::RemoveFromIdTable(Element *aElement, nsIAtom* aId)
     ++mExpandoAndGeneration.generation;
   }
   if (entry->IsEmpty()) {
-    mIdentifierMap.RawRemoveEntry(entry);
+    mIdentifierMap.RemoveEntry(entry);
   }
 }
 
diff --git a/dom/bindings/test/test_exception_options_from_jsimplemented.html b/dom/bindings/test/test_exception_options_from_jsimplemented.html
index 32bf6ca31c..8a98a8fb63 100644
--- a/dom/bindings/test/test_exception_options_from_jsimplemented.html
+++ b/dom/bindings/test/test_exception_options_from_jsimplemented.html
@@ -18,9 +18,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=1107592
     var file = location.href;
     var asyncFrame;
     /* Async parent frames from pushPrefEnv don't show up in e10s.  */
-    var isE10S = !SpecialPowers.Services.wm.getMostRecentWindow("navigator:browser");
+    var isE10S = !SpecialPowers.isMainProcess();
     if (!isE10S && SpecialPowers.getBoolPref("javascript.options.asyncstack")) {
-      asyncFrame = `Async*@${file}:153:1
+      asyncFrame = `Async*@${file}:153:3
 `;
     } else {
       asyncFrame = "";
diff --git a/dom/bindings/test/test_promise_rejections_from_jsimplemented.html b/dom/bindings/test/test_promise_rejections_from_jsimplemented.html
index 6c9b5f22b2..1879224742 100644
--- a/dom/bindings/test/test_promise_rejections_from_jsimplemented.html
+++ b/dom/bindings/test/test_promise_rejections_from_jsimplemented.html
@@ -38,10 +38,10 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=1107592
   function doTest() {
     var t = new TestInterfaceJS();
     /* Async parent frames from pushPrefEnv don't show up in e10s.  */
-    var isE10S = !SpecialPowers.Services.wm.getMostRecentWindow("navigator:browser");
+    var isE10S = !SpecialPowers.isMainProcess();
     var asyncStack = SpecialPowers.getBoolPref("javascript.options.asyncstack");
     var ourFile = location.href;
-    var parentFrame = (asyncStack && !isE10S) ? `Async*@${ourFile}:121:1
+    var parentFrame = (asyncStack && !isE10S) ? `Async*@${ourFile}:121:3
 ` : "";
 
     Promise.all([
diff --git a/dom/interfaces/base/nsIDOMWindowUtils.idl b/dom/interfaces/base/nsIDOMWindowUtils.idl
index 2a5858cab1..d86c748cbc 100644
--- a/dom/interfaces/base/nsIDOMWindowUtils.idl
+++ b/dom/interfaces/base/nsIDOMWindowUtils.idl
@@ -1480,13 +1480,6 @@ interface nsIDOMWindowUtils : nsISupports {
    */
   readonly attribute string focusedInputType;
 
-  /**
-   * Given a view ID from the compositor process, retrieve the element
-   * associated with a view. For scrollpanes for documents, the root
-   * element of the document is returned.
-   */
-  nsIDOMElement findElementWithViewId(in nsViewID aId);
-
   /**
    * Find the view ID for a given element. This is the reverse of
    * findElementWithViewId().
diff --git a/dom/media/platforms/wmf/WMFDecoderModule.cpp b/dom/media/platforms/wmf/WMFDecoderModule.cpp
index 875e206162..5914fde3ab 100644
--- a/dom/media/platforms/wmf/WMFDecoderModule.cpp
+++ b/dom/media/platforms/wmf/WMFDecoderModule.cpp
@@ -163,17 +163,29 @@ CanCreateWMFDecoder()
   return result.value();
 }
 
+/* static */ bool
+WMFDecoderModule::HasH264()
+{
+  return CanCreateWMFDecoder<CLSID_CMSH264DecoderMFT>();
+}
+
+/* static */ bool
+WMFDecoderModule::HasAAC()
+{
+  return CanCreateWMFDecoder<CLSID_CMSAACDecMFT>();
+}
+
 bool
 WMFDecoderModule::SupportsMimeType(const nsACString& aMimeType) const
 {
   if ((aMimeType.EqualsLiteral("audio/mp4a-latm") ||
        aMimeType.EqualsLiteral("audio/mp4")) &&
-      CanCreateWMFDecoder<CLSID_CMSAACDecMFT>()) {
+       WMFDecoderModule::HasAAC()) {
     return true;
   }
   if ((aMimeType.EqualsLiteral("video/avc") ||
        aMimeType.EqualsLiteral("video/mp4")) &&
-      CanCreateWMFDecoder<CLSID_CMSH264DecoderMFT>()) {
+       WMFDecoderModule::HasH264()) {
     return true;
   }
   if (aMimeType.EqualsLiteral("audio/mpeg") &&
diff --git a/dom/media/platforms/wmf/WMFDecoderModule.h b/dom/media/platforms/wmf/WMFDecoderModule.h
index a959e26091..67ec7ac6de 100644
--- a/dom/media/platforms/wmf/WMFDecoderModule.h
+++ b/dom/media/platforms/wmf/WMFDecoderModule.h
@@ -42,6 +42,15 @@ public:
   // Called from any thread, must call init first
   static int GetNumDecoderThreads();
   static bool LowLatencyMFTEnabled();
+
+  // Accessors that report whether we have the required MFTs available
+  // on the system to play various codecs. Windows Vista doesn't have the
+  // H.264/AAC decoders if the "Platform Update Supplement for Windows Vista"
+  // is not installed, and Window N and KN variants also require a "Media
+  // Feature Pack" to be installed. Windows XP doesn't have WMF.
+  static bool HasAAC();
+  static bool HasH264();
+
 private:
   bool mWMFInitialized;
 };
diff --git a/dom/wifi/WifiCertService.cpp b/dom/wifi/WifiCertService.cpp
index d4fb8e04b4..f1508d25b7 100644
--- a/dom/wifi/WifiCertService.cpp
+++ b/dom/wifi/WifiCertService.cpp
@@ -439,6 +439,12 @@ WifiCertService::WifiCertService()
 WifiCertService::~WifiCertService()
 {
   MOZ_ASSERT(!gWifiCertService);
+
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return;
+  }
+  shutdown(calledFromObject);
 }
 
 already_AddRefed<WifiCertService>
diff --git a/dom/xul/XULDocument.cpp b/dom/xul/XULDocument.cpp
index 14a3972d8c..3b5027d667 100644
--- a/dom/xul/XULDocument.cpp
+++ b/dom/xul/XULDocument.cpp
@@ -1831,7 +1831,7 @@ XULDocument::RemoveElementFromRefMap(Element* aElement)
         if (!entry)
             return;
         if (entry->RemoveElement(aElement)) {
-            mRefMap.RawRemoveEntry(entry);
+            mRefMap.RemoveEntry(entry);
         }
     }
 }
diff --git a/extensions/cookie/nsPermissionManager.cpp b/extensions/cookie/nsPermissionManager.cpp
index cd51955ebe..c686011390 100644
--- a/extensions/cookie/nsPermissionManager.cpp
+++ b/extensions/cookie/nsPermissionManager.cpp
@@ -794,7 +794,7 @@ nsPermissionManager::AddInternal(nsIPrincipal* aPrincipal,
   PermissionHashKey* entry = mPermissionTable.PutEntry(key);
   if (!entry) return NS_ERROR_FAILURE;
   if (!entry->GetKey()) {
-    mPermissionTable.RawRemoveEntry(entry);
+    mPermissionTable.RemoveEntry(entry);
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
@@ -929,7 +929,7 @@ nsPermissionManager::AddInternal(nsIPrincipal* aPrincipal,
 
       // If there are no more permissions stored for that entry, clear it.
       if (entry->GetPermissions().IsEmpty()) {
-        mPermissionTable.RawRemoveEntry(entry);
+        mPermissionTable.RemoveEntry(entry);
       }
 
       break;
diff --git a/gfx/thebes/gfxFontconfigUtils.cpp b/gfx/thebes/gfxFontconfigUtils.cpp
index 867f60747e..28f22db2bb 100644
--- a/gfx/thebes/gfxFontconfigUtils.cpp
+++ b/gfx/thebes/gfxFontconfigUtils.cpp
@@ -617,13 +617,13 @@ gfxFontconfigUtils::UpdateFontListInternal(bool aForce)
                     bool added = entry->AddFont(font);
 
                     if (!entry->mKey) {
-                        // The reference to the font pattern keeps the pointer to
-                        // string for the key valid.  If adding the font failed
-                        // then the entry must be removed.
+                        // The reference to the font pattern keeps the pointer
+                        // to string for the key valid.  If adding the font
+                        // failed then the entry must be removed.
                         if (added) {
                             entry->mKey = family;
                         } else {
-                            mFontsByFamily.RawRemoveEntry(entry);
+                            mFontsByFamily.RemoveEntry(entry);
                         }
                     }
                 }
diff --git a/ipc/chromium/src/build/build_config.h b/ipc/chromium/src/build/build_config.h
index 4208f727b5..7a4938e06c 100644
--- a/ipc/chromium/src/build/build_config.h
+++ b/ipc/chromium/src/build/build_config.h
@@ -81,10 +81,7 @@
 #elif defined(__ppc__) || defined(__powerpc__)
 #define ARCH_CPU_PPC 1
 #define ARCH_CPU_32_BITS 1
-#elif defined(__m68k__)
-#define ARCH_CPU_M68K 1
-#define ARCH_CPU_32_BITS 1
-#elif defined(__sparc__) && defined(__arch64__)
+#elif defined(__sparc64__)
 #define ARCH_CPU_SPARC 1
 #define ARCH_CPU_64_BITS 1
 #elif defined(__sparc__)
diff --git a/js/public/MemoryMetrics.h b/js/public/MemoryMetrics.h
index 5157dfa613..a552d5e524 100644
--- a/js/public/MemoryMetrics.h
+++ b/js/public/MemoryMetrics.h
@@ -166,10 +166,11 @@ struct ClassInfo
 #define FOR_EACH_SIZE(macro) \
     macro(Objects, GCHeapUsed, objectsGCHeap) \
     macro(Objects, MallocHeap, objectsMallocHeapSlots) \
-    macro(Objects, MallocHeap, objectsMallocHeapElementsNonAsmJS) \
+    macro(Objects, MallocHeap, objectsMallocHeapElementsNormal) \
     macro(Objects, MallocHeap, objectsMallocHeapElementsAsmJS) \
+    macro(Objects, NonHeap,    objectsNonHeapElementsNormal) \
     macro(Objects, NonHeap,    objectsNonHeapElementsAsmJS) \
-    macro(Objects, NonHeap,    objectsNonHeapElementsMapped) \
+    macro(Objects, NonHeap,    objectsNonHeapElementsShared) \
     macro(Objects, NonHeap,    objectsNonHeapCodeAsmJS) \
     macro(Objects, MallocHeap, objectsMallocHeapMisc) \
     \
diff --git a/js/src/builtin/ModuleObject.cpp b/js/src/builtin/ModuleObject.cpp
index f4998cde0e..52554e7b15 100644
--- a/js/src/builtin/ModuleObject.cpp
+++ b/js/src/builtin/ModuleObject.cpp
@@ -109,7 +109,7 @@ GlobalObject::initImportEntryProto(JSContext* cx, Handle<GlobalObject*> global)
 }
 
 /* static */ ImportEntryObject*
-ImportEntryObject::create(JSContext* cx,
+ImportEntryObject::create(ExclusiveContext* cx,
                           HandleAtom moduleRequest,
                           HandleAtom importName,
                           HandleAtom localName)
@@ -181,7 +181,7 @@ StringOrNullValue(JSString* maybeString)
 }
 
 /* static */ ExportEntryObject*
-ExportEntryObject::create(JSContext* cx,
+ExportEntryObject::create(ExclusiveContext* cx,
                           HandleAtom maybeExportName,
                           HandleAtom maybeModuleRequest,
                           HandleAtom maybeImportName,
@@ -691,6 +691,62 @@ ModuleObject::initImportExportData(HandleArrayObject requestedModules,
     initReservedSlot(StarExportEntriesSlot, ObjectValue(*starExportEntries));
 }
 
+static bool
+FreezeObjectProperty(JSContext* cx, HandleNativeObject obj, uint32_t slot)
+{
+    RootedObject property(cx, &obj->getSlot(slot).toObject());
+    return FreezeObject(cx, property);
+}
+
+/* static */ bool
+ModuleObject::FreezeArrayProperties(JSContext* cx, HandleModuleObject self)
+{
+    return FreezeObjectProperty(cx, self, RequestedModulesSlot) &&
+           FreezeObjectProperty(cx, self, ImportEntriesSlot) &&
+           FreezeObjectProperty(cx, self, LocalExportEntriesSlot) &&
+           FreezeObjectProperty(cx, self, IndirectExportEntriesSlot) &&
+           FreezeObjectProperty(cx, self, StarExportEntriesSlot);
+}
+
+static inline void
+AssertObjectPropertyFrozen(JSContext* cx, HandleNativeObject obj, uint32_t slot)
+{
+#ifdef DEBUG
+    bool frozen = false;
+    RootedObject property(cx, &obj->getSlot(slot).toObject());
+    MOZ_ALWAYS_TRUE(TestIntegrityLevel(cx, property, IntegrityLevel::Frozen, &frozen));
+    MOZ_ASSERT(frozen);
+#endif
+}
+
+/* static */ inline void
+ModuleObject::AssertArrayPropertiesFrozen(JSContext* cx, HandleModuleObject self)
+{
+    AssertObjectPropertyFrozen(cx, self, RequestedModulesSlot);
+    AssertObjectPropertyFrozen(cx, self, ImportEntriesSlot);
+    AssertObjectPropertyFrozen(cx, self, LocalExportEntriesSlot);
+    AssertObjectPropertyFrozen(cx, self, IndirectExportEntriesSlot);
+    AssertObjectPropertyFrozen(cx, self, StarExportEntriesSlot);
+}
+
+inline static void
+AssertModuleScopesMatch(ModuleObject* module)
+{
+    MOZ_ASSERT(IsStaticGlobalLexicalScope(module->enclosingStaticScope()));
+    MOZ_ASSERT(module->initialEnvironment().enclosingScope().as<ClonedBlockObject>().staticScope() ==
+               module->enclosingStaticScope());
+}
+
+void
+ModuleObject::fixScopesAfterCompartmentMerge(JSContext* cx)
+{
+    AssertModuleScopesMatch(this);
+    Rooted<ClonedBlockObject*> lexicalScope(cx, &script()->global().lexicalScope());
+    setReservedSlot(StaticScopeSlot, ObjectValue(lexicalScope->staticBlock()));
+    initialEnvironment().setEnclosingScope(lexicalScope);
+    AssertModuleScopesMatch(this);
+}
+
 bool
 ModuleObject::hasScript() const
 {
@@ -762,6 +818,8 @@ ModuleObject::noteFunctionDeclaration(ExclusiveContext* cx, HandleAtom name, Han
 /* static */ bool
 ModuleObject::instantiateFunctionDeclarations(JSContext* cx, HandleModuleObject self)
 {
+    AssertArrayPropertiesFrozen(cx, self);
+
     FunctionDeclarationVector* funDecls = self->functionDeclarations();
     if (!funDecls) {
         JS_ReportError(cx, "Module function declarations have already been instantiated");
@@ -798,6 +856,8 @@ ModuleObject::setEvaluated()
 /* static */ bool
 ModuleObject::evaluate(JSContext* cx, HandleModuleObject self, MutableHandleValue rval)
 {
+    AssertArrayPropertiesFrozen(cx, self);
+
     RootedScript script(cx, self->script());
     RootedModuleEnvironmentObject scope(cx, self->environment());
     if (!scope) {
@@ -887,7 +947,7 @@ js::InitModuleClasses(JSContext* cx, HandleObject obj)
 ///////////////////////////////////////////////////////////////////////////
 // ModuleBuilder
 
-ModuleBuilder::ModuleBuilder(JSContext* cx, HandleModuleObject module)
+ModuleBuilder::ModuleBuilder(ExclusiveContext* cx, HandleModuleObject module)
   : cx_(cx),
     module_(cx, module),
     requestedModules_(cx, AtomVector(cx)),
@@ -1170,8 +1230,6 @@ ArrayObject* ModuleBuilder::createArray(const GCVector<T>& vector)
     array->setDenseInitializedLength(length);
     for (uint32_t i = 0; i < length; i++)
         array->initDenseElement(i, MakeElementValue(vector[i]));
-    if (!JS_FreezeObject(cx_, array))
-        return nullptr;
 
     return array;
 }
diff --git a/js/src/builtin/ModuleObject.h b/js/src/builtin/ModuleObject.h
index 973e026a5b..cca0e7b34b 100644
--- a/js/src/builtin/ModuleObject.h
+++ b/js/src/builtin/ModuleObject.h
@@ -43,9 +43,9 @@ class ImportEntryObject : public NativeObject
     };
 
     static const Class class_;
-    static JSObject* initClass(JSContext* cx, HandleObject obj);
+    static JSObject* initClass(ExclusiveContext* cx, HandleObject obj);
     static bool isInstance(HandleValue value);
-    static ImportEntryObject* create(JSContext* cx,
+    static ImportEntryObject* create(ExclusiveContext* cx,
                                      HandleAtom moduleRequest,
                                      HandleAtom importName,
                                      HandleAtom localName);
@@ -70,9 +70,9 @@ class ExportEntryObject : public NativeObject
     };
 
     static const Class class_;
-    static JSObject* initClass(JSContext* cx, HandleObject obj);
+    static JSObject* initClass(ExclusiveContext* cx, HandleObject obj);
     static bool isInstance(HandleValue value);
-    static ExportEntryObject* create(JSContext* cx,
+    static ExportEntryObject* create(ExclusiveContext* cx,
                                      HandleAtom maybeExportName,
                                      HandleAtom maybeModuleRequest,
                                      HandleAtom maybeImportName,
@@ -232,6 +232,9 @@ class ModuleObject : public NativeObject
                               HandleArrayObject localExportEntries,
                               HandleArrayObject indiretExportEntries,
                               HandleArrayObject starExportEntries);
+    static bool FreezeArrayProperties(JSContext* cx, HandleModuleObject self);
+    static void AssertArrayPropertiesFrozen(JSContext* cx, HandleModuleObject self);
+    void fixScopesAfterCompartmentMerge(JSContext* cx);
 
     JSScript* script() const;
     JSObject* enclosingStaticScope() const;
@@ -273,7 +276,7 @@ class ModuleObject : public NativeObject
 class MOZ_STACK_CLASS ModuleBuilder
 {
   public:
-    explicit ModuleBuilder(JSContext* cx, HandleModuleObject module);
+    explicit ModuleBuilder(ExclusiveContext* cx, HandleModuleObject module);
 
     bool processImport(frontend::ParseNode* pn);
     bool processExport(frontend::ParseNode* pn);
@@ -296,7 +299,7 @@ class MOZ_STACK_CLASS ModuleBuilder
     using RootedImportEntryVector = JS::Rooted<ImportEntryVector>;
     using RootedExportEntryVector = JS::Rooted<ExportEntryVector>;
 
-    JSContext* cx_;
+    ExclusiveContext* cx_;
     RootedModuleObject module_;
     RootedAtomVector requestedModules_;
     RootedAtomVector importedBoundNames_;
diff --git a/js/src/builtin/Sorting.js b/js/src/builtin/Sorting.js
index ade8fe1954..c35a399f0a 100644
--- a/js/src/builtin/Sorting.js
+++ b/js/src/builtin/Sorting.js
@@ -6,12 +6,158 @@
 // consolidated here to avoid confusion and re-implementation of existing
 // algorithms.
 
+// For sorting values with limited range; uint8 and int8.
+function CountingSort(array, len, signed) {
+    var buffer = new List();
+    var min = 0;
+
+    // Map int8 values onto the uint8 range when storing in buffer.
+    if (signed)  {
+        min = -128;
+    }
+
+    for (var i = 0; i  < 256; i++) {
+        buffer[i] = 0;
+    }
+
+    // Populate the buffer
+    for (var i = 0; i < len; i++) {
+        var val = array[i];
+        buffer[val - min]++
+    }
+
+    // Traverse the buffer in order and write back elements to array
+    var val = 0;
+    for (var i = 0; i < len; i++) {
+        // Invariant: sum(buffer[val:]) == len-i
+        while (true) {
+            if (buffer[val] > 0) {
+                array[i] = val + min;
+                buffer[val]--;
+                break;
+            } else {
+                val++;
+            }
+        }
+    }
+    return array;
+}
+
+// Helper for RadixSort
+function ByteAtCol(x, pos) {
+    return  (x >> (pos * 8)) & 0xFF;
+}
+
+function SortByColumn(array, len, aux, col) {
+    const R = 256;
+    let counts = new List();
+
+    // |counts| is used to compute the starting index position for each key.
+    // Letting counts[0] always be 0, simplifies the transform step below.
+    // Example:
+    //
+    // Computing frequency counts for the input [1 2 1] gives:
+    //      0 1 2 3 ... (keys)
+    //      0 0 2 1     (frequencies)
+    //
+    // Transforming frequencies to indexes gives:
+    //      0 1 2 3 ... (keys)
+    //      0 0 2 3     (indexes)
+    for (let r = 0; r < R + 1; r++) {
+        counts[r] = 0;
+    }
+    // Compute frequency counts
+    for (let i = 0; i < len; i++) {
+        let val = array[i];
+        let b = ByteAtCol(val, col);
+        counts[b + 1]++;
+    }
+
+    // Transform counts to indices.
+    for (let r = 0; r < R; r++) {
+        counts[r+1] += counts[r];
+    }
+
+    // Distribute
+    for (let i = 0; i < len; i++) {
+        let val = array[i];
+        let b  = ByteAtCol(val, col);
+        aux[counts[b]++] = val;
+    }
+
+    // Copy back
+    for (let i = 0; i < len; i++) {
+        array[i] = aux[i];
+    }
+}
+
+// Sorts integers and float32. |signed| is true for int16 and int32, |floating|
+// is true for float32.
+function RadixSort(array, len, nbytes, signed, floating, comparefn) {
+
+    // Determined by performance testing.
+    if (len < 128) {
+        QuickSort(array, len, comparefn);
+        return array;
+    }
+
+    let aux = new List();
+    for (let i = 0; i < len; i++) {
+        aux[i] = 0;
+    }
+
+    let view = array;
+    let signMask = 1 << nbytes * 8 - 1;
+
+    // Preprocess
+    if (floating) {
+        view = new Int32Array(array.buffer);
+
+        // Flip sign bit for positive numbers; flip all bits for negative
+        // numbers
+        for (let i = 0; i < len; i++) {
+            if (view[i] & signMask) {
+                view[i] ^= 0xFFFFFFFF;
+            } else {
+                view[i] ^= signMask
+            }
+        }
+    } else if (signed) {
+        // Flip sign bit
+        for (let i = 0; i < len; i++) {
+            view[i] ^= signMask
+        }
+    }
+
+    // Sort
+    for (let col = 0; col < nbytes; col++) {
+        SortByColumn(view, len, aux, col);
+    }
+
+    // Restore original bit representation
+    if (floating) {
+        for (let i = 0; i < len; i++) {
+            if (view[i] & signMask) {
+                view[i] ^= signMask;
+            } else {
+                view[i] ^= 0xFFFFFFFF;
+            }
+        }
+    } else if (signed) {
+        for (let i = 0; i < len; i++) {
+            view[i] ^= signMask
+        }
+    }
+    return array;
+}
+
+
 // For sorting small arrays.
 function InsertionSort(array, from, to, comparefn) {
-    var item, swap;
-    for (var i = from + 1; i <= to; i++) {
+    let item, swap, i, j;
+    for (i = from + 1; i <= to; i++) {
         item = array[i];
-        for (var j = i - 1; j >= from; j--) {
+        for (j = i - 1; j >= from; j--) {
             swap = array[j];
             if (comparefn(swap, item) <= 0)
                 break;
@@ -28,28 +174,29 @@ function SwapArrayElements(array, i, j) {
 }
 
 // A helper function for MergeSort.
-function Merge(array, start, mid, end, lBuffer, rBuffer, comparefn) {
+function Merge(list, start, mid, end, lBuffer, rBuffer, comparefn) {
     var i, j, k;
 
     var sizeLeft = mid - start + 1;
     var sizeRight =  end - mid;
 
-    // Copy our virtual arrays into separate buffers.
+    // Copy our virtual lists into separate buffers.
     for (i = 0; i < sizeLeft; i++)
-        lBuffer[i] = array[start + i];
+        lBuffer[i] = list[start + i];
 
     for (j = 0; j < sizeRight; j++)
-        rBuffer[j] = array[mid + 1 + j];
+        rBuffer[j] = list[mid + 1 + j];
+
 
     i = 0;
     j = 0;
     k = start;
     while (i < sizeLeft && j < sizeRight) {
         if (comparefn(lBuffer[i], rBuffer[j]) <= 0) {
-            array[k] = lBuffer[i];
+            list[k] = lBuffer[i];
             i++;
         } else {
-            array[k] = rBuffer[j];
+            list[k] = rBuffer[j];
             j++;
         }
         k++;
@@ -57,46 +204,70 @@ function Merge(array, start, mid, end, lBuffer, rBuffer, comparefn) {
 
     // Empty out any remaining elements in the buffer.
     while (i < sizeLeft) {
-        array[k] = lBuffer[i];
+        list[k] =lBuffer[i];
         i++;
         k++;
     }
 
     while (j < sizeRight) {
-        array[k] = rBuffer[j];
+        list[k] =rBuffer[j];
         j++;
         k++;
     }
 }
 
+// Helper function for overwriting a sparse array with a
+// dense array, filling remaining slots with holes.
+function MoveHoles(sparse, sparseLen, dense, denseLen) {
+    for (var i = 0; i < denseLen; i++)
+        _DefineDataProperty(sparse, i, dense[i]);
+    for (var j = denseLen; j < sparseLen; j++)
+        delete sparse[j];
+}
+
 // Iterative, bottom up, mergesort.
 function MergeSort(array, len, comparefn) {
+    // To save effort we will do all of our work on a dense list,
+    // then create holes at the end.
+    var denseList = new List();
+    var denseLen = 0;
+
+    for (var i = 0; i < len; i++) {
+        if (i in array)
+            denseList[denseLen++] = array[i];
+    }
+
+    if (denseLen < 1)
+        return array;
+
     // Insertion sort for small arrays, where "small" is defined by performance
     // testing.
     if (len < 24) {
-       InsertionSort(array, 0, len - 1, comparefn);
-       return array;
+        InsertionSort(denseList, 0, denseLen - 1, comparefn);
+        MoveHoles(array, len, denseList, denseLen);
+        return array;
     }
 
     // We do all of our allocating up front
     var lBuffer = new List();
     var rBuffer = new List();
-    var mid, end, endOne, endTwo;
 
-    for (var windowSize = 1; windowSize < len; windowSize = 2*windowSize) {
-        for (var start = 0; start < len - 1; start += 2*windowSize) {
-            assert(windowSize < len, "The window size is larger than the array length!");
+    var mid, end, endOne, endTwo;
+    for (var windowSize = 1; windowSize < denseLen; windowSize = 2 * windowSize) {
+        for (var start = 0; start < denseLen - 1; start += 2 * windowSize) {
+            assert(windowSize < denseLen, "The window size is larger than the array denseLength!");
             // The midpoint between the two subarrays.
             mid = start + windowSize - 1;
             // To keep from going over the edge.
             end = start + 2 * windowSize - 1;
-            end = end < len - 1 ? end : len - 1;
+            end = end < denseLen - 1 ? end : denseLen - 1;
             // Skip lopsided runs to avoid doing useless work
             if (mid > end)
                 continue;
-            Merge(array, start, mid, end, lBuffer, rBuffer, comparefn);
+            Merge(denseList, start, mid, end, lBuffer, rBuffer, comparefn);
         }
     }
+    MoveHoles(array, len, denseList, denseLen);
     return array;
 }
 
diff --git a/js/src/builtin/TypedArray.js b/js/src/builtin/TypedArray.js
index 3966b1ad40..143aeb5b21 100644
--- a/js/src/builtin/TypedArray.js
+++ b/js/src/builtin/TypedArray.js
@@ -988,7 +988,22 @@ function TypedArraySort(comparefn) {
 
     if (comparefn === undefined) {
         comparefn = TypedArrayCompare;
-    } else {
+        // CountingSort doesn't invoke the comparefn
+        if (IsUint8TypedArray(obj)) {
+            return CountingSort(obj, len, false /* signed */);
+        } else if (IsInt8TypedArray(obj)) {
+            return CountingSort(obj, len, true /* signed */);
+        } else if (IsUint16TypedArray(obj)) {
+            return RadixSort(obj, len, 2 /* nbytes */, false /* signed */, false /* floating */, comparefn);
+        } else if (IsInt16TypedArray(obj)) {
+            return RadixSort(obj, len, 2 /* nbytes */, true /* signed */, false /* floating */, comparefn);
+        } else if (IsUint32TypedArray(obj)) {
+            return RadixSort(obj, len, 4 /* nbytes */, false /* signed */, false /* floating */, comparefn);
+        } else if (IsInt32TypedArray(obj)) {
+            return RadixSort(obj, len, 4 /* nbytes */, true /* signed */, false /* floating */, comparefn);
+        } else if (IsFloat32TypedArray(obj)) {
+            return RadixSort(obj, len, 4 /* nbytes */, true /* signed */, true /* floating */, comparefn);
+        }
         // To satisfy step 2 from TypedArray SortCompare described in 22.2.3.26
         // the user supplied comparefn is wrapped.
         var wrappedCompareFn = comparefn;
diff --git a/js/src/doc/Debugger/Debugger.md b/js/src/doc/Debugger/Debugger.md
index 8048fcd8fd..df275002fc 100644
--- a/js/src/doc/Debugger/Debugger.md
+++ b/js/src/doc/Debugger/Debugger.md
@@ -536,3 +536,13 @@ other kinds of objects.
     `TypeError`. Determine which global is designated by <i>global</i>
     using the same rules as [`Debugger.prototype.addDebuggee`][add].
 
+## Static methods of the Debugger Object
+
+The functions described below are not called with a `this` value.
+
+<code id="isCompilableUnit">isCompilableUnit(<i>source</i>)</code>
+:   Given a string of source code, designated by <i>source</i>, return false if
+    the string might become a valid JavaScript statement with the addition of
+    more lines. Otherwise return true. The intent is to support interactive
+    compilation - accumulate lines in a buffer until isCompilableUnit is true,
+    then pass it to the compiler.
diff --git a/js/src/frontend/BytecodeCompiler.cpp b/js/src/frontend/BytecodeCompiler.cpp
index 3ea9abe339..eade3ffa7c 100644
--- a/js/src/frontend/BytecodeCompiler.cpp
+++ b/js/src/frontend/BytecodeCompiler.cpp
@@ -576,7 +576,7 @@ ModuleObject* BytecodeCompiler::compileModule()
 
     module->init(script);
 
-    ModuleBuilder builder(cx->asJSContext(), module);
+    ModuleBuilder builder(cx, module);
     ParseNode* pn = parser->standaloneModule(module, builder);
     if (!pn)
         return nullptr;
@@ -759,20 +759,32 @@ frontend::CompileScript(ExclusiveContext* cx, LifoAlloc* alloc, HandleObject sco
 }
 
 ModuleObject*
-frontend::CompileModule(JSContext* cx, HandleObject obj,
-                        const ReadOnlyCompileOptions& optionsInput,
-                        SourceBufferHolder& srcBuf)
+frontend::CompileModule(ExclusiveContext* cx, const ReadOnlyCompileOptions& optionsInput,
+                        SourceBufferHolder& srcBuf, LifoAlloc* alloc)
 {
     MOZ_ASSERT(srcBuf.get());
+    MOZ_ASSERT(cx->isJSContext() == (alloc == nullptr));
+
+    if (!alloc)
+        alloc = &cx->asJSContext()->tempLifoAlloc();
 
     CompileOptions options(cx, optionsInput);
     options.maybeMakeStrictMode(true); // ES6 10.2.1 Module code is always strict mode code.
     options.setIsRunOnce(true);
 
     Rooted<StaticScope*> staticScope(cx, &cx->global()->lexicalScope().staticBlock());
-    BytecodeCompiler compiler(cx, &cx->tempLifoAlloc(), options, srcBuf, staticScope,
+    BytecodeCompiler compiler(cx, alloc, options, srcBuf, staticScope,
                               TraceLogger_ParserCompileModule);
-    return compiler.compileModule();
+    RootedModuleObject module(cx, compiler.compileModule());
+    if (!module)
+        return nullptr;
+
+    // This happens in GlobalHelperThreadState::finishModuleParseTask() when a
+    // module is compiled off main thread.
+    if (cx->isJSContext() && !ModuleObject::FreezeArrayProperties(cx->asJSContext(), module))
+        return nullptr;
+
+    return module;
 }
 
 bool
diff --git a/js/src/frontend/BytecodeCompiler.h b/js/src/frontend/BytecodeCompiler.h
index 258149477f..dac0ce0df6 100644
--- a/js/src/frontend/BytecodeCompiler.h
+++ b/js/src/frontend/BytecodeCompiler.h
@@ -32,9 +32,9 @@ CompileScript(ExclusiveContext* cx, LifoAlloc* alloc,
               SourceCompressionTask* extraSct = nullptr,
               ScriptSourceObject** sourceObjectOut = nullptr);
 
-ModuleObject *
-CompileModule(JSContext *cx, HandleObject obj, const ReadOnlyCompileOptions &options,
-              SourceBufferHolder &srcBuf);
+ModuleObject*
+CompileModule(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
+              SourceBufferHolder &srcBuf, LifoAlloc* alloc = nullptr);
 
 bool
 CompileLazyFunction(JSContext* cx, Handle<LazyScript*> lazy, const char16_t* chars, size_t length);
diff --git a/js/src/gc/Zone.h b/js/src/gc/Zone.h
index 8b7b1873cd..efeda7604a 100644
--- a/js/src/gc/Zone.h
+++ b/js/src/gc/Zone.h
@@ -128,7 +128,7 @@ struct Zone : public JS::shadow::Zone,
 {
     explicit Zone(JSRuntime* rt);
     ~Zone();
-    bool init(bool isSystem);
+    MOZ_WARN_UNUSED_RESULT bool init(bool isSystem);
 
     void findOutgoingEdges(js::gc::ComponentFinder<JS::Zone>& finder);
 
@@ -152,7 +152,8 @@ struct Zone : public JS::shadow::Zone,
     bool isTooMuchMalloc() const { return gcMallocBytes <= 0; }
     void onTooMuchMalloc();
 
-    void* onOutOfMemory(js::AllocFunction allocFunc, size_t nbytes, void* reallocPtr = nullptr) {
+    MOZ_WARN_UNUSED_RESULT void* onOutOfMemory(js::AllocFunction allocFunc, size_t nbytes,
+                                               void* reallocPtr = nullptr) {
         if (!js::CurrentThreadCanAccessRuntime(runtime_))
             return nullptr;
         return runtimeFromMainThread()->onOutOfMemory(allocFunc, nbytes, reallocPtr);
@@ -360,7 +361,7 @@ struct Zone : public JS::shadow::Zone,
     mozilla::DebugOnly<unsigned> gcLastZoneGroupIndex;
 
     // Creates a HashNumber based on getUniqueId. Returns false on OOM.
-    bool getHashCode(js::gc::Cell* cell, js::HashNumber* hashp) {
+    MOZ_WARN_UNUSED_RESULT bool getHashCode(js::gc::Cell* cell, js::HashNumber* hashp) {
         uint64_t uid;
         if (!getUniqueId(cell, &uid))
             return false;
@@ -370,7 +371,7 @@ struct Zone : public JS::shadow::Zone,
 
     // Puts an existing UID in |uidp|, or creates a new UID for this Cell and
     // puts that into |uidp|. Returns false on OOM.
-    bool getUniqueId(js::gc::Cell* cell, uint64_t* uidp) {
+    MOZ_WARN_UNUSED_RESULT bool getUniqueId(js::gc::Cell* cell, uint64_t* uidp) {
         MOZ_ASSERT(uidp);
         MOZ_ASSERT(js::CurrentThreadCanAccessZone(this));
 
@@ -396,7 +397,7 @@ struct Zone : public JS::shadow::Zone,
     }
 
     // Return true if this cell has a UID associated with it.
-    bool hasUniqueId(js::gc::Cell* cell) {
+    MOZ_WARN_UNUSED_RESULT bool hasUniqueId(js::gc::Cell* cell) {
         MOZ_ASSERT(js::CurrentThreadCanAccessZone(this));
         return uniqueIds_.has(cell);
     }
diff --git a/js/src/jit-test/tests/collections/constructor-errors.js b/js/src/jit-test/tests/collections/constructor-errors.js
index d5cbab9e79..6a145180f1 100644
--- a/js/src/jit-test/tests/collections/constructor-errors.js
+++ b/js/src/jit-test/tests/collections/constructor-errors.js
@@ -2,12 +2,9 @@
 
 load(libdir + "asserts.js");
 
-function argsobj() { return arguments; }
-
 var misc = [
     {}, {x: 1}, Math, isNaN,
     Object.create(null),
-    argsobj(0, 1, 2),
     true, 0, 3.1416,
     new Boolean(true), new Number(0),
     {iterator: function () { return undefined; }},
@@ -19,4 +16,4 @@ var misc = [
 for (var v of misc) {
     assertThrowsInstanceOf(function () { new Set(v); }, TypeError);
     assertThrowsInstanceOf(function () { new Map(v); }, TypeError);
-}
\ No newline at end of file
+}
diff --git a/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js b/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js
new file mode 100644
index 0000000000..9653805c48
--- /dev/null
+++ b/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js
@@ -0,0 +1,58 @@
+load(libdir + "asserts.js");
+
+const bad_types = [
+    2112,
+    {geddy: "lee"},
+    () => 1,
+    [],
+    Array
+]
+
+// We only accept strings around here!
+for (var badType of bad_types) {
+    assertThrowsInstanceOf(() => {
+        Debugger.isCompilableUnit(badType);
+    }, TypeError);
+}
+
+const compilable_units = [
+   "wubba-lubba-dub-dub",
+   "'Get Schwifty!'",
+   "1 + 2",
+   "function f(x) {}",
+   "function x(f,) {", // statements with bad syntax are always compilable
+   "let x = 100",
+   ";;;;;;;;",
+   "",
+   " ",
+   "\n",
+   "let x",
+]
+
+const non_compilable_units = [
+    "function f(x) {",
+    "(...d) =>",
+    "{geddy:",
+    "{",
+    "[1, 2",
+    "[",
+    "1 +",
+    "let x =",
+    "3 ==",
+]
+
+for (var code of compilable_units) {
+    assertEq(Debugger.isCompilableUnit(code), true);
+}
+
+for (var code of non_compilable_units) {
+    assertEq(Debugger.isCompilableUnit(code), false);
+}
+
+// Supplying no arguments should throw a type error
+assertThrowsInstanceOf(() => {
+    Debugger.isCompilableUnit();
+}, TypeError);
+
+// Supplying extra arguments should be fine
+assertEq(Debugger.isCompilableUnit("", 1, 2, 3, 4, {}, []), true);
diff --git a/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js b/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js
new file mode 100644
index 0000000000..633c8176b7
--- /dev/null
+++ b/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js
@@ -0,0 +1,13 @@
+gczeal(2);
+g = newGlobal();
+dbg = Debugger(g);
+function loop() {
+  for (var i = 0; i < 10; i++)
+    debugger;
+}
+g.eval(loop.toSource());
+dbg.onDebuggerStatement = function(f) {
+  f.script.getOffsetsCoverage();
+}
+dbg.collectCoverageInfo = true;
+g.eval("loop")();
diff --git a/js/src/jit-test/tests/debug/bug-1238610.js b/js/src/jit-test/tests/debug/bug-1238610.js
new file mode 100644
index 0000000000..369d7550d0
--- /dev/null
+++ b/js/src/jit-test/tests/debug/bug-1238610.js
@@ -0,0 +1,27 @@
+// |jit-test| allow-oom
+
+if (!('oomAfterAllocations' in this))
+    quit();
+
+if (helperThreadCount() === 0)
+    quit(0);
+
+lfcode = new Array();
+dbg = Debugger();
+dbg.onEnterFrame = function() {};
+g = newGlobal();
+lfcode.push(`
+  oomAfterAllocations(100);
+  new Number();
+  dbg.addDebuggee(g);
+`)
+file = lfcode.shift();
+loadFile(file);
+function loadFile(lfVarx) {
+  lfGlobal = newGlobal()
+  for (lfLocal in this)
+      if (!(lfLocal in lfGlobal))
+          lfGlobal[lfLocal] = this[lfLocal]
+  offThreadCompileScript(lfVarx)
+  lfGlobal.runOffThreadScript()
+}
diff --git a/js/src/jit-test/tests/debug/bug1240546.js b/js/src/jit-test/tests/debug/bug1240546.js
new file mode 100644
index 0000000000..58efe086e2
--- /dev/null
+++ b/js/src/jit-test/tests/debug/bug1240546.js
@@ -0,0 +1,12 @@
+// |jit-test| allow-oom
+
+if (!('oomAfterAllocations' in this))
+  quit();
+
+var g = newGlobal();
+g.debuggeeGlobal = this;
+g.eval("(" + function() {
+    oomAfterAllocations(100);
+    var dbg = Debugger(debuggeeGlobal);
+    dbg.onEnterFrame = function(frame) {}
+} + ")()");
diff --git a/js/src/jit-test/tests/debug/bug1240803.js b/js/src/jit-test/tests/debug/bug1240803.js
new file mode 100644
index 0000000000..84f6d17e59
--- /dev/null
+++ b/js/src/jit-test/tests/debug/bug1240803.js
@@ -0,0 +1,24 @@
+// |jit-test| allow-oom
+
+
+if (!('oomAfterAllocations' in this))
+  quit();
+
+(function() {
+    g = newGlobal()
+    dbg = new Debugger
+    g.toggle = function(d) {
+        if (d) {
+            dbg.addDebuggee(g);
+            dbg.getNewestFrame();
+            oomAfterAllocations(2);
+            setBreakpoint;
+        }
+    }
+    g.eval("" + function f(d) toggle(d))
+    g.eval("(" + function() {
+        f(false);
+        f(true);
+    } + ")()")
+})();
+
diff --git a/js/src/jit-test/tests/debug/bug1242111.js b/js/src/jit-test/tests/debug/bug1242111.js
new file mode 100644
index 0000000000..4b365ad5f5
--- /dev/null
+++ b/js/src/jit-test/tests/debug/bug1242111.js
@@ -0,0 +1,11 @@
+// |jit-test| allow-oom
+
+if (!('oomAfterAllocations' in this))
+  quit();
+
+var g = newGlobal();
+g.debuggeeGlobal = [];
+g.eval("(" + function() {
+    oomAfterAllocations(57);
+    Debugger(debuggeeGlobal).onEnterFrame = function() {}
+} + ")()");
diff --git a/js/src/jit-test/tests/for-of/non-iterable.js b/js/src/jit-test/tests/for-of/non-iterable.js
index 319e622ef1..99f6b07802 100644
--- a/js/src/jit-test/tests/for-of/non-iterable.js
+++ b/js/src/jit-test/tests/for-of/non-iterable.js
@@ -2,12 +2,9 @@
 
 load(libdir + "asserts.js");
 
-function argsobj() { return arguments; }
-
 var misc = [
     {}, {x: 1}, Math, isNaN,
     Object.create(null),
-    argsobj(0, 1, 2),
     null, undefined,
     true, 0, 3.1416,
     new Boolean(true), new Number(0),
diff --git a/js/src/jit-test/tests/gc/bug-1240503.js b/js/src/jit-test/tests/gc/bug-1240503.js
new file mode 100644
index 0000000000..0cc7cabe07
--- /dev/null
+++ b/js/src/jit-test/tests/gc/bug-1240503.js
@@ -0,0 +1,8 @@
+if (!('oomTest' in this))
+  quit();
+
+function arrayProtoOutOfRange() {
+    for (let [] = () => r, get;;)
+        var r = f(i % 2 ? a : b);
+}
+oomTest(arrayProtoOutOfRange);
diff --git a/js/src/jit-test/tests/ion/bug832058.js b/js/src/jit-test/tests/ion/bug832058.js
index e6877e5bf1..22dd37a4e0 100644
--- a/js/src/jit-test/tests/ion/bug832058.js
+++ b/js/src/jit-test/tests/ion/bug832058.js
@@ -1,4 +1,4 @@
-// |jit-test| error: TypeError
+// |jit-test|
 function f(c) {
   var b = arguments;
   if (c == 1)
@@ -9,6 +9,7 @@ function f(c) {
 evaluate("f('a', 'b', 'c', 'd', 'e');");
 function test(){
   var args = f('a', (0), 'c');
+  var s;
   for (var v of args)
       s += v;
 }
diff --git a/js/src/jit-test/tests/modules/global-scope.js b/js/src/jit-test/tests/modules/global-scope.js
new file mode 100644
index 0000000000..90a9f7026b
--- /dev/null
+++ b/js/src/jit-test/tests/modules/global-scope.js
@@ -0,0 +1,32 @@
+// Test interaction with global object and global lexical scope.
+
+function parseAndEvaluate(source) {
+    let m = parseModule(source);
+    m.declarationInstantiation();
+    return m.evaluation();
+}
+
+var x = 1;
+assertEq(parseAndEvaluate("let r = x; x = 2; r"), 1);
+assertEq(x, 2);
+
+let y = 3;
+assertEq(parseAndEvaluate("let r = y; y = 4; r"), 3);
+assertEq(y, 4);
+
+if (helperThreadCount() == 0)
+    quit();
+
+function offThreadParseAndEvaluate(source) {
+    offThreadCompileModule(source);
+    let m = finishOffThreadModule();
+    print("compiled");
+    m.declarationInstantiation();
+    return m.evaluation();
+}
+
+assertEq(offThreadParseAndEvaluate("let r = x; x = 5; r"), 2);
+assertEq(x, 5);
+
+assertEq(offThreadParseAndEvaluate("let r = y; y = 6; r"), 4);
+assertEq(y, 6);
diff --git a/js/src/jit-test/tests/modules/off-thread-compile.js b/js/src/jit-test/tests/modules/off-thread-compile.js
new file mode 100644
index 0000000000..8d17d4a749
--- /dev/null
+++ b/js/src/jit-test/tests/modules/off-thread-compile.js
@@ -0,0 +1,16 @@
+// Test off thread module compilation.
+
+if (helperThreadCount() == 0)
+    quit();
+
+load(libdir + "asserts.js");
+load(libdir + "dummyModuleResolveHook.js");
+
+function offThreadParseAndEvaluate(source) {
+    offThreadCompileModule(source);
+    let m = finishOffThreadModule();
+    m.declarationInstantiation();
+    return m.evaluation();
+}
+
+offThreadParseAndEvaluate("export let x = 2 * 3;");
diff --git a/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js b/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js
new file mode 100644
index 0000000000..2d111cc3a4
--- /dev/null
+++ b/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js
@@ -0,0 +1,9 @@
+load(libdir + "asserts.js");
+
+assertTypeErrorMessage(() => WeakSet.prototype.add.call({}), "add method called on incompatible Object");
+assertTypeErrorMessage(() => newGlobal().WeakSet.prototype.add.call({}), "add method called on incompatible Object");
+assertTypeErrorMessage(() => WeakSet.prototype.add.call(15), "add method called on incompatible number");
+
+assertTypeErrorMessage(() => Int8Array.prototype.find.call({}), "find method called on incompatible Object");
+assertTypeErrorMessage(() => newGlobal().Int8Array.prototype.find.call({}), "find method called on incompatible Object");
+assertTypeErrorMessage(() => Int8Array.prototype.find.call(15), "find method called on incompatible number");
diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp
index 792c59b8f9..c8e0f3f76a 100644
--- a/js/src/jsapi.cpp
+++ b/js/src/jsapi.cpp
@@ -963,7 +963,8 @@ JS_TransplantObject(JSContext* cx, HandleObject origobj, HandleObject target)
         MOZ_ASSERT(Wrapper::wrappedObject(newIdentityWrapper) == newIdentity);
         if (!JSObject::swap(cx, origobj, newIdentityWrapper))
             MOZ_CRASH();
-        origobj->compartment()->putWrapper(cx, CrossCompartmentKey(newIdentity), origv);
+        if (!origobj->compartment()->putWrapper(cx, CrossCompartmentKey(newIdentity), origv))
+            MOZ_CRASH();
     }
 
     // The new identity object might be one of several things. Return it to avoid
@@ -1056,15 +1057,6 @@ static const JSStdName builtin_property_names[] = {
 #if JS_HAS_UNEVAL
     { EAGER_ATOM(uneval), JSProto_String },
 #endif
-#ifdef ENABLE_SIMD
-    { EAGER_ATOM(SIMD), JSProto_SIMD },
-#endif
-#ifdef ENABLE_BINARYDATA
-    { EAGER_ATOM(TypedObject), JSProto_TypedObject },
-#endif
-#ifdef ENABLE_SHARED_ARRAY_BUFFER
-    { EAGER_ATOM(Atomics), JSProto_Atomics },
-#endif
 
     { 0, JSProto_LIMIT }
 };
@@ -1104,14 +1096,8 @@ JS_ResolveStandardClass(JSContext* cx, HandleObject obj, HandleId id, bool* reso
     if (!stdnm)
         stdnm = LookupStdName(cx->names(), idAtom, builtin_property_names);
 
-#ifdef ENABLE_SHARED_ARRAY_BUFFER
-    if (stdnm && !cx->compartment()->creationOptions().getSharedMemoryAndAtomicsEnabled() &&
-        (stdnm->atomOffset == NAME_OFFSET(Atomics) ||
-         stdnm->atomOffset == NAME_OFFSET(SharedArrayBuffer)))
-    {
+    if (stdnm && GlobalObject::skipDeselectedConstructor(cx, stdnm->key))
         stdnm = nullptr;
-    }
-#endif
 
     // If this class is anonymous, then it doesn't exist as a global
     // property, so we won't resolve anything.
@@ -1154,6 +1140,9 @@ JS_MayResolveStandardClass(const JSAtomState& names, jsid id, JSObject* maybeObj
 
     JSAtom* atom = JSID_TO_ATOM(id);
 
+    // This will return true even for deselected constructors.  (To do
+    // better, we need a JSContext here; it's fine as it is.)
+
     return atom == names.undefined ||
            LookupStdName(names, atom, standard_class_names) ||
            LookupStdName(names, atom, builtin_property_names);
@@ -1210,6 +1199,9 @@ JS_IdToProtoKey(JSContext* cx, HandleId id)
     if (!stdnm)
         return JSProto_Null;
 
+    if (GlobalObject::skipDeselectedConstructor(cx, stdnm->key))
+        return JSProto_Null;
+
     MOZ_ASSERT(MOZ_ARRAY_LENGTH(standard_class_names) == JSProto_LIMIT + 1);
     return static_cast<JSProtoKey>(stdnm - standard_class_names);
 }
@@ -3091,7 +3083,7 @@ DefineSelfHostedProperty(JSContext* cx, HandleObject obj, HandleId id,
             return false;
         }
         MOZ_ASSERT(setterValue.isObject() && setterValue.toObject().is<JSFunction>());
-        setterFunc = &getterValue.toObject().as<JSFunction>();
+        setterFunc = &setterValue.toObject().as<JSFunction>();
     }
     JSNative setterOp = JS_DATA_TO_FUNC_PTR(JSNative, setterFunc.get());
 
@@ -4153,11 +4145,11 @@ JS::FinishOffThreadScript(JSContext* maybecx, JSRuntime* rt, void* token)
         RootedScript script(maybecx);
         {
             AutoLastFrameCheck lfc(maybecx);
-            script = HelperThreadState().finishParseTask(maybecx, rt, token);
+            script = HelperThreadState().finishScriptParseTask(maybecx, rt, token);
         }
         return script;
     } else {
-        return HelperThreadState().finishParseTask(maybecx, rt, token);
+        return HelperThreadState().finishScriptParseTask(maybecx, rt, token);
     }
 }
 
diff --git a/js/src/jsarray.cpp b/js/src/jsarray.cpp
index 56a82acda9..d1608997c0 100644
--- a/js/src/jsarray.cpp
+++ b/js/src/jsarray.cpp
@@ -2343,7 +2343,11 @@ CanOptimizeForDenseStorage(HandleObject arr, uint32_t startingIndex, uint32_t co
      * visited.  See bug 690622.
      */
     ObjectGroup* arrGroup = arr->getGroup(cx);
-    if (MOZ_UNLIKELY(!arrGroup || arrGroup->hasAllFlags(OBJECT_FLAG_ITERATED)))
+    if (!arrGroup) {
+        cx->recoverFromOutOfMemory();
+        return false;
+    }
+    if (MOZ_UNLIKELY(arrGroup->hasAllFlags(OBJECT_FLAG_ITERATED)))
         return false;
 
     /*
diff --git a/js/src/jscompartment.cpp b/js/src/jscompartment.cpp
index 45edbc057b..6e5f2f3fa6 100644
--- a/js/src/jscompartment.cpp
+++ b/js/src/jscompartment.cpp
@@ -670,7 +670,7 @@ JSCompartment::sweepInnerViews()
 void
 JSCompartment::sweepSavedStacks()
 {
-    savedStacks_.sweep(runtimeFromAnyThread());
+    savedStacks_.sweep();
 }
 
 void
@@ -1078,8 +1078,9 @@ JSCompartment::clearScriptCounts()
     // Clear all hasScriptCounts_ flags of JSScript, in order to release all
     // ScriptCounts entry of the current compartment.
     for (ScriptCountsMap::Range r = scriptCountsMap->all(); !r.empty(); r.popFront()) {
-        ScriptCounts* value = &r.front().value();
+        ScriptCounts* value = r.front().value();
         r.front().key()->takeOverScriptCountsMapEntry(value);
+        js_delete(value);
     }
 
     js_delete(scriptCountsMap);
diff --git a/js/src/jscompartment.h b/js/src/jscompartment.h
index e59b7779f0..f27586287c 100644
--- a/js/src/jscompartment.h
+++ b/js/src/jscompartment.h
@@ -485,7 +485,8 @@ struct JSCompartment
         DebuggerNeedsDelazification = 1 << 4
     };
 
-    unsigned                     debugModeBits;
+    unsigned debugModeBits;
+    friend class AutoRestoreCompartmentDebugMode;
 
     static const unsigned DebuggerObservesMask = IsDebuggee |
                                                  DebuggerObservesAllExecution |
@@ -498,17 +499,18 @@ struct JSCompartment
     JSCompartment(JS::Zone* zone, const JS::CompartmentOptions& options);
     ~JSCompartment();
 
-    bool init(JSContext* maybecx);
+    MOZ_WARN_UNUSED_RESULT bool init(JSContext* maybecx);
 
-    inline bool wrap(JSContext* cx, JS::MutableHandleValue vp,
-                     JS::HandleObject existing = nullptr);
+    MOZ_WARN_UNUSED_RESULT inline bool wrap(JSContext* cx, JS::MutableHandleValue vp,
+                                            JS::HandleObject existing = nullptr);
 
-    bool wrap(JSContext* cx, js::MutableHandleString strp);
-    bool wrap(JSContext* cx, JS::MutableHandleObject obj,
-              JS::HandleObject existingArg = nullptr);
-    bool wrap(JSContext* cx, JS::MutableHandle<js::PropertyDescriptor> desc);
+    MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, js::MutableHandleString strp);
+    MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, JS::MutableHandleObject obj,
+                                     JS::HandleObject existingArg = nullptr);
+    MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, JS::MutableHandle<js::PropertyDescriptor> desc);
 
-    template<typename T> bool wrap(JSContext* cx, JS::AutoVectorRooter<T>& vec) {
+    template<typename T> MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx,
+                                                          JS::AutoVectorRooter<T>& vec) {
         for (size_t i = 0; i < vec.length(); ++i) {
             if (!wrap(cx, vec[i]))
                 return false;
@@ -516,7 +518,8 @@ struct JSCompartment
         return true;
     };
 
-    bool putWrapper(JSContext* cx, const js::CrossCompartmentKey& wrapped, const js::Value& wrapper);
+    MOZ_WARN_UNUSED_RESULT bool putWrapper(JSContext* cx, const js::CrossCompartmentKey& wrapped,
+                                           const js::Value& wrapper);
 
     js::WrapperMap::Ptr lookupWrapper(const js::Value& wrapped) const {
         return crossCompartmentWrappers.lookup(js::CrossCompartmentKey(wrapped));
diff --git a/js/src/jsobj.cpp b/js/src/jsobj.cpp
index 76d51bae68..210ff1b76c 100644
--- a/js/src/jsobj.cpp
+++ b/js/src/jsobj.cpp
@@ -3757,7 +3757,7 @@ JSObject::addSizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf, JS::ClassIn
     if (is<NativeObject>() && as<NativeObject>().hasDynamicElements()) {
         js::ObjectElements* elements = as<NativeObject>().getElementsHeader();
         if (!elements->isCopyOnWrite() || elements->ownerObject() == this)
-            info->objectsMallocHeapElementsNonAsmJS += mallocSizeOf(elements);
+            info->objectsMallocHeapElementsNormal += mallocSizeOf(elements);
     }
 
     // Other things may be measured in the future if DMD indicates it is worthwhile.
diff --git a/js/src/jsprf.cpp b/js/src/jsprf.cpp
index 0d367de047..0cdd27ddfb 100644
--- a/js/src/jsprf.cpp
+++ b/js/src/jsprf.cpp
@@ -1021,7 +1021,7 @@ JS_vsnprintf(char* out, uint32_t outlen, const char* fmt, va_list ap)
     (void) dosprintf(&ss, fmt, ap);
 
     uint32_t charsWritten = ss.cur - ss.base;
-    MOZ_ASSERT(charsWritten > 0);
+    MOZ_RELEASE_ASSERT(charsWritten > 0);
 
     // If we didn't append a null then we must have hit the buffer limit. Write
     // a null terminator now and return a value indicating that we failed.
diff --git a/js/src/jsscript.cpp b/js/src/jsscript.cpp
index 2cd21a4464..98018f7477 100644
--- a/js/src/jsscript.cpp
+++ b/js/src/jsscript.cpp
@@ -14,6 +14,7 @@
 #include "mozilla/MathAlgorithms.h"
 #include "mozilla/MemoryReporting.h"
 #include "mozilla/PodOperations.h"
+#include "mozilla/ScopeExit.h"
 #include "mozilla/Vector.h"
 
 #include <algorithm>
@@ -1343,20 +1344,26 @@ JSScript::initScriptCounts(JSContext* cx)
     jsbytecode* mainEntry = main();
     for (jsbytecode* pc = code(); pc != end; pc = GetNextPc(pc)) {
         if (pc == mainEntry) {
-            if (!jumpTargets.append(pc))
+            if (!jumpTargets.append(pc)) {
+                ReportOutOfMemory(cx);
                 return false;
+            }
         }
 
         bool jump = IsJumpOpcode(JSOp(*pc));
         if (jump) {
             jsbytecode* target = pc + GET_JUMP_OFFSET(pc);
-            if (!jumpTargets.append(target))
+            if (!jumpTargets.append(target)) {
+                ReportOutOfMemory(cx);
                 return false;
+            }
 
             if (BytecodeFallsThrough(JSOp(*pc))) {
                 jsbytecode* fallthrough = GetNextPc(pc);
-                if (!jumpTargets.append(fallthrough))
+                if (!jumpTargets.append(fallthrough)) {
+                    ReportOutOfMemory(cx);
                     return false;
+                }
             }
         }
 
@@ -1365,8 +1372,10 @@ JSScript::initScriptCounts(JSContext* cx)
             int32_t len = GET_JUMP_OFFSET(pc2);
 
             // Default target.
-            if (!jumpTargets.append(pc + len))
+            if (!jumpTargets.append(pc + len)) {
+                ReportOutOfMemory(cx);
                 return false;
+            }
 
             pc2 += JUMP_OFFSET_LEN;
             int32_t low = GET_JUMP_OFFSET(pc2);
@@ -1378,8 +1387,10 @@ JSScript::initScriptCounts(JSContext* cx)
                 int32_t off = (int32_t) GET_JUMP_OFFSET(pc2);
                 if (off) {
                     // Case (i + low)
-                    if (!jumpTargets.append(pc + off))
+                    if (!jumpTargets.append(pc + off)) {
+                        ReportOutOfMemory(cx);
                         return false;
+                    }
                 }
             }
         }
@@ -1396,8 +1407,10 @@ JSScript::initScriptCounts(JSContext* cx)
                 continue;
 
             jsbytecode* tryTarget = tryStart + tn->length;
-            if (!jumpTargets.append(tryTarget))
+            if (!jumpTargets.append(tryTarget)) {
+                ReportOutOfMemory(cx);
                 return false;
+            }
         }
     }
 
@@ -1408,8 +1421,10 @@ JSScript::initScriptCounts(JSContext* cx)
 
     // Initialize all PCCounts counters to 0.
     ScriptCounts::PCCountsVector base;
-    if (!base.reserve(jumpTargets.length()))
+    if (!base.reserve(jumpTargets.length())) {
+        ReportOutOfMemory(cx);
         return false;
+    }
 
     for (size_t i = 0; i < jumpTargets.length(); i++)
         base.infallibleEmplaceBack(pcToOffset(jumpTargets[i]));
@@ -1418,8 +1433,10 @@ JSScript::initScriptCounts(JSContext* cx)
     ScriptCountsMap* map = compartment()->scriptCountsMap;
     if (!map) {
         map = cx->new_<ScriptCountsMap>();
-        if (!map)
+        if (!map) {
+            ReportOutOfMemory(cx);
             return false;
+        }
 
         if (!map->init()) {
             js_delete(map);
@@ -1430,12 +1447,25 @@ JSScript::initScriptCounts(JSContext* cx)
         compartment()->scriptCountsMap = map;
     }
 
-    // Register the current ScriptCount in the compartment's map.
-    if (!map->putNew(this, Move(base)))
+    // Allocate the ScriptCounts.
+    ScriptCounts* sc = cx->new_<ScriptCounts>(Move(base));
+    if (!sc) {
+        ReportOutOfMemory(cx);
         return false;
+    }
+    auto guardScriptCounts = mozilla::MakeScopeExit([&] () {
+        js_delete(sc);
+    });
+
+    // Register the current ScriptCounts in the compartment's map.
+    if (!map->putNew(this, sc)) {
+        ReportOutOfMemory(cx);
+        return false;
+    }
 
     // safe to set this;  we can't fail after this point.
     hasScriptCounts_ = true;
+    guardScriptCounts.release();
 
     // Enable interrupts in any interpreter frames running on this script. This
     // is used to let the interpreter increment the PCCounts, if present.
@@ -1460,7 +1490,7 @@ ScriptCounts&
 JSScript::getScriptCounts()
 {
     ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this);
-    return p->value();
+    return *p->value();
 }
 
 js::PCCounts*
@@ -1620,7 +1650,7 @@ JSScript::takeOverScriptCountsMapEntry(ScriptCounts* entryValue)
 {
 #ifdef DEBUG
     ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this);
-    MOZ_ASSERT(entryValue == &p->value());
+    MOZ_ASSERT(entryValue == p->value());
 #endif
     hasScriptCounts_ = false;
 }
@@ -1629,7 +1659,8 @@ void
 JSScript::releaseScriptCounts(ScriptCounts* counts)
 {
     ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this);
-    *counts = Move(p->value());
+    *counts = Move(*p->value());
+    js_delete(p->value());
     compartment()->scriptCountsMap->remove(p);
     hasScriptCounts_ = false;
 }
@@ -3217,21 +3248,17 @@ js::PCToLineNumber(unsigned startLine, jssrcnote* notes, jsbytecode* code, jsbyt
     ptrdiff_t target = pc - code;
     for (jssrcnote* sn = notes; !SN_IS_TERMINATOR(sn); sn = SN_NEXT(sn)) {
         offset += SN_DELTA(sn);
-        SrcNoteType type = (SrcNoteType) SN_TYPE(sn);
-        if (type == SRC_SETLINE) {
-            if (offset <= target)
-                lineno = unsigned(GetSrcNoteOffset(sn, 0));
-            column = 0;
-        } else if (type == SRC_NEWLINE) {
-            if (offset <= target)
-                lineno++;
-            column = 0;
-        }
-
         if (offset > target)
             break;
 
-        if (type == SRC_COLSPAN) {
+        SrcNoteType type = (SrcNoteType) SN_TYPE(sn);
+        if (type == SRC_SETLINE) {
+            lineno = unsigned(GetSrcNoteOffset(sn, 0));
+            column = 0;
+        } else if (type == SRC_NEWLINE) {
+            lineno++;
+            column = 0;
+        } else if (type == SRC_COLSPAN) {
             ptrdiff_t colspan = SN_OFFSET_TO_COLSPAN(GetSrcNoteOffset(sn, 0));
             MOZ_ASSERT(ptrdiff_t(column) + colspan >= 0);
             column += colspan;
diff --git a/js/src/jsscript.h b/js/src/jsscript.h
index d9738a35bc..a4fe4c2895 100644
--- a/js/src/jsscript.h
+++ b/js/src/jsscript.h
@@ -520,7 +520,7 @@ class ScriptCounts
 // the calls to the JSScript::finalize function which are used to aggregate code
 // coverage results on the compartment.
 typedef HashMap<JSScript*,
-                ScriptCounts,
+                ScriptCounts*,
                 DefaultHasher<JSScript*>,
                 SystemAllocPolicy> ScriptCountsMap;
 
@@ -1052,11 +1052,10 @@ class JSScript : public js::gc::TenuredCell
     uint32_t        sourceStart_;
     uint32_t        sourceEnd_;
 
-    uint32_t        warmUpCount; /* Number of times the script has been called
-                                  * or has had backedges taken. When running in
-                                  * ion, also increased for any inlined scripts.
-                                  * Reset if the script's JIT code is forcibly
-                                  * discarded. */
+    // Number of times the script has been called or has had backedges taken.
+    // When running in ion, also increased for any inlined scripts. Reset if
+    // the script's JIT code is forcibly discarded.
+    mozilla::Atomic<uint32_t, mozilla::Relaxed> warmUpCount;
 
     // 16-bit fields.
 
@@ -1719,7 +1718,7 @@ class JSScript : public js::gc::TenuredCell
   public:
     uint32_t getWarmUpCount() const { return warmUpCount; }
     uint32_t incWarmUpCounter(uint32_t amount = 1) { return warmUpCount += amount; }
-    uint32_t* addressOfWarmUpCounter() { return &warmUpCount; }
+    uint32_t* addressOfWarmUpCounter() { return reinterpret_cast<uint32_t*>(&warmUpCount); }
     static size_t offsetOfWarmUpCounter() { return offsetof(JSScript, warmUpCount); }
     void resetWarmUpCounter() { incWarmUpResetCounter(); warmUpCount = 0; }
 
@@ -2143,8 +2142,8 @@ class LazyScript : public gc::TenuredCell
     // Function or block chain in which the script is nested, or nullptr.
     HeapPtrObject enclosingScope_;
 
-    // ScriptSourceObject, or nullptr if the script in which this is nested
-    // has not been compiled yet. This is never a CCW; we don't clone
+    // ScriptSourceObject. We leave this set to nullptr until we generate
+    // bytecode for our immediate parent. This is never a CCW; we don't clone
     // LazyScripts into other compartments.
     HeapPtrObject sourceObject_;
 
diff --git a/js/src/jsversion.h b/js/src/jsversion.h
index ec04292b98..81294ab24f 100644
--- a/js/src/jsversion.h
+++ b/js/src/jsversion.h
@@ -37,9 +37,8 @@
  */
 #define JS_OLD_GETTER_SETTER_METHODS    1
 
-/* Support for ES6 Classes. */
 #ifdef NIGHTLY_BUILD
-#define JS_HAS_CLASSES 1
-#endif
+
+#endif // NIGHTLY_BUILD
 
 #endif /* jsversion_h */
diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp
index a63500535a..c4a050a576 100644
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -3332,7 +3332,6 @@ ParseModule(JSContext* cx, unsigned argc, Value* vp)
         return false;
     }
 
-    RootedObject global(cx, JS::CurrentGlobalOrNull(cx));
     JSFlatString* scriptContents = args[0].toString()->ensureFlat(cx);
     if (!scriptContents)
         return false;
@@ -3364,7 +3363,7 @@ ParseModule(JSContext* cx, unsigned argc, Value* vp)
     SourceBufferHolder srcBuf(chars, scriptContents->length(),
                               SourceBufferHolder::NoOwnership);
 
-    RootedObject module(cx, frontend::CompileModule(cx, global, options, srcBuf));
+    RootedObject module(cx, frontend::CompileModule(cx, options, srcBuf));
     if (!module)
         return false;
 
@@ -3698,6 +3697,109 @@ runOffThreadScript(JSContext* cx, unsigned argc, Value* vp)
     return JS_ExecuteScript(cx, script, args.rval());
 }
 
+static bool
+CompileOffThreadModule(JSContext* cx, const ReadOnlyCompileOptions& options,
+                       const char16_t* chars, size_t length,
+                       JS::OffThreadCompileCallback callback, void* callbackData)
+{
+    MOZ_ASSERT(JS::CanCompileOffThread(cx, options, length));
+    return StartOffThreadParseModule(cx, options, chars, length, callback, callbackData);
+}
+
+static JSObject*
+FinishOffThreadModule(JSContext* maybecx, JSRuntime* rt, void* token)
+{
+    MOZ_ASSERT(CurrentThreadCanAccessRuntime(rt));
+    return HelperThreadState().finishModuleParseTask(maybecx, rt, token);
+}
+
+static bool
+OffThreadCompileModule(JSContext* cx, unsigned argc, Value* vp)
+{
+    CallArgs args = CallArgsFromVp(argc, vp);
+
+    if (args.length() != 1 || !args[0].isString()) {
+        JS_ReportErrorNumber(cx, my_GetErrorMessage, nullptr, JSSMSG_INVALID_ARGS,
+                             "offThreadCompileModule");
+        return false;
+    }
+
+    JSAutoByteString fileNameBytes;
+    CompileOptions options(cx);
+    options.setIntroductionType("js shell offThreadCompileModule")
+           .setFileAndLine("<string>", 1);
+    options.setIsRunOnce(true)
+           .setSourceIsLazy(false);
+    options.forceAsync = true;
+
+    JSString* scriptContents = args[0].toString();
+    AutoStableStringChars stableChars(cx);
+    if (!stableChars.initTwoByte(cx, scriptContents))
+        return false;
+
+    size_t length = scriptContents->length();
+    const char16_t* chars = stableChars.twoByteRange().start().get();
+
+    // Make sure we own the string's chars, so that they are not freed before
+    // the compilation is finished.
+    ScopedJSFreePtr<char16_t> ownedChars;
+    if (stableChars.maybeGiveOwnershipToCaller()) {
+        ownedChars = const_cast<char16_t*>(chars);
+    } else {
+        char16_t* copy = cx->pod_malloc<char16_t>(length);
+        if (!copy)
+            return false;
+
+        mozilla::PodCopy(copy, chars, length);
+        ownedChars = copy;
+        chars = copy;
+    }
+
+    if (!JS::CanCompileOffThread(cx, options, length)) {
+        JS_ReportError(cx, "cannot compile code on worker thread");
+        return false;
+    }
+
+    if (!offThreadState.startIfIdle(cx, ownedChars)) {
+        JS_ReportError(cx, "called offThreadCompileModule without receiving prior off-thread "
+                       "compilation");
+        return false;
+    }
+
+    if (!CompileOffThreadModule(cx, options, chars, length,
+                                OffThreadCompileScriptCallback, nullptr))
+    {
+        offThreadState.abandon(cx);
+        return false;
+    }
+
+    args.rval().setUndefined();
+    return true;
+}
+
+static bool
+FinishOffThreadModule(JSContext* cx, unsigned argc, Value* vp)
+{
+    CallArgs args = CallArgsFromVp(argc, vp);
+
+    JSRuntime* rt = cx->runtime();
+    if (OffThreadParsingMustWaitForGC(rt))
+        gc::AutoFinishGC finishgc(rt);
+
+    void* token = offThreadState.waitUntilDone(cx);
+    if (!token) {
+        JS_ReportError(cx, "called finishOffThreadModule when no compilation is pending");
+        return false;
+    }
+
+    RootedObject module(cx, FinishOffThreadModule(cx, rt, token));
+    if (!module)
+        return false;
+
+    args.rval().setObject(*module);
+    return true;
+}
+
 struct MOZ_RAII FreeOnReturn
 {
     JSContext* cx;
@@ -5057,6 +5159,16 @@ static const JSFunctionSpecWithHelp shell_functions[] = {
 "  throw the appropriate exception; otherwise, run the script and return\n"
 "  its value."),
 
+    JS_FN_HELP("offThreadCompileModule", OffThreadCompileModule, 1, 0,
+"offThreadCompileModule(code)",
+"  Compile |code| on a helper thread. To wait for the compilation to finish\n"
+"  and get the module object, call |finishOffThreadModule|."),
+
+    JS_FN_HELP("finishOffThreadModule", FinishOffThreadModule, 0, 0,
+"finishOffThreadModule()",
+"  Wait for off-thread compilation to complete. If an error occurred,\n"
+"  throw the appropriate exception; otherwise, return the module object"),
+
     JS_FN_HELP("timeout", Timeout, 1, 0,
 "timeout([seconds], [func])",
 "  Get/Set the limit in seconds for the execution time for the current context.\n"
diff --git a/js/src/tests/ecma_6/Array/iterator_edge_cases.js b/js/src/tests/ecma_6/Array/iterator_edge_cases.js
index a52d1d40e1..079245ebfb 100644
--- a/js/src/tests/ecma_6/Array/iterator_edge_cases.js
+++ b/js/src/tests/ecma_6/Array/iterator_edge_cases.js
@@ -7,7 +7,7 @@ function TestArrayIteratorPrototypeConfusion() {
         throw new Error("Call did not throw");
     } catch (e) {
         assertEq(e instanceof TypeError, true);
-        assertEq(e.message, "CallArrayIteratorMethodIfWrapped method called on incompatible Array Iterator");
+        assertEq(e.message, "next method called on incompatible Array Iterator");
     }
 }
 TestArrayIteratorPrototypeConfusion();
diff --git a/js/src/tests/ecma_6/Array/sort_holes.js b/js/src/tests/ecma_6/Array/sort_holes.js
new file mode 100644
index 0000000000..332fbce37b
--- /dev/null
+++ b/js/src/tests/ecma_6/Array/sort_holes.js
@@ -0,0 +1,72 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/ */
+
+// We should preserve holes when sorting sparce arrays.
+// See bug: 1246860
+
+function denseCount(arr) {
+    var c = 0;
+    for (var i = 0; i < arr.length; i++)
+        if (i in arr)
+            c++;
+    return c;
+}
+
+let a = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}];
+let b = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}].sort();
+let c = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}].sort((a, b) => {+a.size - +b.size});
+
+assertEq(a.length, 22);
+assertEq(denseCount(a), 2);
+assertEq(a.length, b.length);
+assertEq(b.length, c.length);
+assertEq(denseCount(a), denseCount(b));
+assertEq(denseCount(b), denseCount(c));
+
+let superSparce = new Array(5000);
+superSparce[0] = 99;
+superSparce[4000] = 0;
+superSparce[4999] = -1;
+
+assertEq(superSparce.length, 5000);
+assertEq(denseCount(superSparce), 3);
+
+superSparce.sort((a, b) => 1*a-b);
+assertEq(superSparce.length, 5000);
+assertEq(denseCount(superSparce), 3);
+assertEq(superSparce[0], -1);
+assertEq(superSparce[1], 0);
+assertEq(superSparce[2], 99);
+
+let allHoles = new Array(2600);
+assertEq(allHoles.length, 2600);
+assertEq(denseCount(allHoles), 0);
+allHoles.sort((a, b) => 1*a-b);
+assertEq(allHoles.length, 2600);
+assertEq(denseCount(allHoles), 0);
+
+let oneHole = new Array(2600);
+oneHole[1399] = {size: 27};
+assertEq(oneHole.length, 2600);
+assertEq(denseCount(oneHole), 1);
+oneHole.sort((a, b) => {+a.size - +b.size});
+assertDeepEq(oneHole[0], {size: 27});
+assertEq(oneHole.length, 2600);
+assertEq(denseCount(oneHole), 1);
+
+// Ensure that the array setter is touch touched during sorting.
+
+Object.defineProperty(Array.prototype, "0", {
+    set: (value) => {throw "Illegally touched the array's setter!"},
+    configurable: true
+});
+
+assertThrows(() => {o[1] = 11;});
+
+let o = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}];
+o.sort((a, b) => {+a.size - +b.size});
+
+delete Array.prototype["0"];
+
+if (typeof reportCompare === 'function')
+    reportCompare(0, 0);
diff --git a/js/src/tests/ecma_6/Function/arguments-iterator.js b/js/src/tests/ecma_6/Function/arguments-iterator.js
new file mode 100644
index 0000000000..5610b8a245
--- /dev/null
+++ b/js/src/tests/ecma_6/Function/arguments-iterator.js
@@ -0,0 +1,167 @@
+var BUGNUMBER = 992617;
+var summary = "Implement arguments[@@iterator].";
+
+print(BUGNUMBER + ": " + summary);
+
+// MappedArgumentsObject
+let mapped = [
+  function(a, b, c) {
+    assertEq(Symbol.iterator in arguments, true);
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function(a, b, c) {
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function(a, b, c) {
+    arguments[Symbol.iterator] = 10;
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function(a, b, c) {
+    Object.defineProperty(arguments, Symbol.iterator, {
+      value: 10, writable: true, enumerable: true, configurable: true
+    });
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function(a, b, c) {
+    assertEq(arguments[Symbol.iterator], Array.prototype[Symbol.iterator]);
+  },
+  function(a, b, c) {
+    assertEq(arguments[Symbol.iterator].name, "values");
+  },
+  function(a, b, c) {
+    var desc = Object.getOwnPropertyDescriptor(arguments, Symbol.iterator);
+    assertEq("value" in desc, true);
+    assertEq(desc.value, Array.prototype[Symbol.iterator]);
+    assertEq(desc.writable, true);
+    assertEq(desc.enumerable, false);
+    assertEq(desc.configurable, true);
+  },
+  function(a, b, c) {
+    var iter = arguments[Symbol.iterator]();
+    assertDeepEq(iter.next(), { value: 10, done: false });
+    assertDeepEq(iter.next(), { value: 20, done: false });
+    assertDeepEq(iter.next(), { value: 30, done: false });
+    assertDeepEq(iter.next(), { value: undefined, done: true });
+  },
+  function(a, b, c) {
+    assertDeepEq([...arguments], [10, 20, 30]);
+  },
+  function(a, b, c) {
+    b = 40;
+    assertDeepEq([...arguments], [10, 40, 30]);
+  },
+  function(a, b, c) {
+    arguments.length = 4;
+    assertDeepEq([...arguments], [10, 20, 30, undefined]);
+  },
+  function(a, b, c) {
+    arguments[5] = 50;
+    assertDeepEq([...arguments], [10, 20, 30]);
+  },
+  function(a, b, c) {
+    arguments[Symbol.iterator] = function*() {
+      yield 40;
+      yield 50;
+      yield 60;
+    };
+    assertDeepEq([...arguments], [40, 50, 60]);
+  },
+];
+for (let f of mapped) {
+  f(10, 20, 30);
+}
+
+var g1 = newGlobal();
+assertEq(g1.eval(`
+function f(a, b, c) {
+  return arguments[Symbol.iterator].name;
+}
+f(1, 2, 3);
+`), "values");
+
+// UnmappedArgumentsObject
+let unmapped = [
+  function([a], b, c) {
+    assertEq(Symbol.iterator in arguments, true);
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function([a], b, c) {
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function([a], b, c) {
+    arguments[Symbol.iterator] = 10;
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function([a], b, c) {
+    Object.defineProperty(arguments, Symbol.iterator, {
+      value: 10, writable: true, enumerable: true, configurable: true
+    });
+    delete arguments[Symbol.iterator];
+    assertEq(Symbol.iterator in arguments, false);
+  },
+  function([a], b, c) {
+    assertEq(arguments[Symbol.iterator], Array.prototype[Symbol.iterator]);
+  },
+  function([a], b, c) {
+    assertEq(arguments[Symbol.iterator].name, "values");
+  },
+  function([a], b, c) {
+    var desc = Object.getOwnPropertyDescriptor(arguments, Symbol.iterator);
+    assertEq("value" in desc, true);
+    assertEq(desc.value, Array.prototype[Symbol.iterator]);
+    assertEq(desc.writable, true);
+    assertEq(desc.enumerable, false);
+    assertEq(desc.configurable, true);
+  },
+  function([a], b, c) {
+    var iter = arguments[Symbol.iterator]();
+    assertDeepEq(iter.next(), { value: [10], done: false });
+    assertDeepEq(iter.next(), { value: 20, done: false });
+    assertDeepEq(iter.next(), { value: 30, done: false });
+    assertDeepEq(iter.next(), { value: undefined, done: true });
+  },
+  function([a], b, c) {
+    assertDeepEq([...arguments], [[10], 20, 30]);
+  },
+  function([a], b, c) {
+    b = 40;
+    assertDeepEq([...arguments], [[10], 20, 30]);
+  },
+  function([a], b, c) {
+    arguments.length = 4;
+    assertDeepEq([...arguments], [[10], 20, 30, undefined]);
+  },
+  function([a], b, c) {
+    arguments[5] = 50;
+    assertDeepEq([...arguments], [[10], 20, 30]);
+  },
+  function([a], b, c) {
+    arguments[Symbol.iterator] = function*() {
+      yield 40;
+      yield 50;
+      yield 60;
+    };
+    assertDeepEq([...arguments], [40, 50, 60]);
+  },
+];
+for (let f of unmapped) {
+  f([10], 20, 30);
+}
+
+var g2 = newGlobal();
+assertEq(g2.eval(`
+function f([a], b, c) {
+  return arguments[Symbol.iterator].name;
+}
+f([1], 2, 3);
+`), "values");
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
diff --git a/js/src/tests/ecma_6/String/iterator_edge_cases.js b/js/src/tests/ecma_6/String/iterator_edge_cases.js
index 2cba856909..15fe6d0fb1 100644
--- a/js/src/tests/ecma_6/String/iterator_edge_cases.js
+++ b/js/src/tests/ecma_6/String/iterator_edge_cases.js
@@ -7,7 +7,7 @@ function TestStringIteratorPrototypeConfusion() {
         throw new Error("Call did not throw");
     } catch (e) {
         assertEq(e instanceof TypeError, true);
-        assertEq(e.message, "CallStringIteratorMethodIfWrapped method called on incompatible String Iterator");
+        assertEq(e.message, "next method called on incompatible String Iterator");
     }
 }
 TestStringIteratorPrototypeConfusion();
diff --git a/js/src/vm/ArgumentsObject.cpp b/js/src/vm/ArgumentsObject.cpp
index 25becc6a5f..e409dfcb12 100644
--- a/js/src/vm/ArgumentsObject.cpp
+++ b/js/src/vm/ArgumentsObject.cpp
@@ -328,6 +328,8 @@ ArgumentsObject::obj_delProperty(JSContext* cx, HandleObject obj, HandleId id,
         argsobj.markLengthOverridden();
     } else if (JSID_IS_ATOM(id, cx->names().callee)) {
         argsobj.as<MappedArgumentsObject>().clearCallee();
+    } else if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) {
+        argsobj.markIteratorOverridden();
     }
     return result.succeed();
 }
@@ -398,11 +400,34 @@ MappedArgSetter(JSContext* cx, HandleObject obj, HandleId id, MutableHandleValue
            NativeDefineProperty(cx, argsobj, id, vp, nullptr, nullptr, attrs, result);
 }
 
+static bool
+DefineArgumentsIterator(JSContext* cx, Handle<ArgumentsObject*> argsobj)
+{
+    RootedId iteratorId(cx, SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator));
+    HandlePropertyName shName = cx->names().ArrayValues;
+    RootedAtom name(cx, cx->names().values);
+    RootedValue val(cx);
+    if (!GlobalObject::getSelfHostedFunction(cx, cx->global(), shName, name, 0, &val))
+        return false;
+
+    return NativeDefineProperty(cx, argsobj, iteratorId, val, nullptr, nullptr, JSPROP_RESOLVING);
+}
+
 /* static */ bool
 MappedArgumentsObject::obj_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp)
 {
     Rooted<MappedArgumentsObject*> argsobj(cx, &obj->as<MappedArgumentsObject>());
 
+    if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) {
+        if (argsobj->hasOverriddenIterator())
+            return true;
+
+        if (!DefineArgumentsIterator(cx, argsobj))
+            return false;
+        *resolvedp = true;
+        return true;
+    }
+
     unsigned attrs = JSPROP_SHARED | JSPROP_SHADOWABLE | JSPROP_RESOLVING;
     if (JSID_IS_INT(id)) {
         uint32_t arg = uint32_t(JSID_TO_INT(id));
@@ -448,6 +473,10 @@ MappedArgumentsObject::obj_enumerate(JSContext* cx, HandleObject obj)
     if (!HasProperty(cx, argsobj, id, &found))
         return false;
 
+    id = SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator);
+    if (!HasProperty(cx, argsobj, id, &found))
+        return false;
+
     for (unsigned i = 0; i < argsobj->initialLength(); i++) {
         id = INT_TO_JSID(i);
         if (!HasProperty(cx, argsobj, id, &found))
@@ -519,6 +548,16 @@ UnmappedArgumentsObject::obj_resolve(JSContext* cx, HandleObject obj, HandleId i
 {
     Rooted<UnmappedArgumentsObject*> argsobj(cx, &obj->as<UnmappedArgumentsObject>());
 
+    if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) {
+        if (argsobj->hasOverriddenIterator())
+            return true;
+
+        if (!DefineArgumentsIterator(cx, argsobj))
+            return false;
+        *resolvedp = true;
+        return true;
+    }
+
     unsigned attrs = JSPROP_SHARED | JSPROP_SHADOWABLE;
     GetterOp getter = UnmappedArgGetter;
     SetterOp setter = UnmappedArgSetter;
@@ -570,6 +609,10 @@ UnmappedArgumentsObject::obj_enumerate(JSContext* cx, HandleObject obj)
     if (!HasProperty(cx, argsobj, id, &found))
         return false;
 
+    id = SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator);
+    if (!HasProperty(cx, argsobj, id, &found))
+        return false;
+
     for (unsigned i = 0; i < argsobj->initialLength(); i++) {
         id = INT_TO_JSID(i);
         if (!HasProperty(cx, argsobj, id, &found))
diff --git a/js/src/vm/ArgumentsObject.h b/js/src/vm/ArgumentsObject.h
index 0eecff6d6c..636b7a79b7 100644
--- a/js/src/vm/ArgumentsObject.h
+++ b/js/src/vm/ArgumentsObject.h
@@ -109,8 +109,9 @@ static const unsigned ARGS_LENGTH_MAX = 500 * 1000;
  *
  *   INITIAL_LENGTH_SLOT
  *     Stores the initial value of arguments.length, plus a bit indicating
- *     whether arguments.length has been modified.  Use initialLength() and
- *     hasOverriddenLength() to access these values.  If arguments.length has
+ *     whether arguments.length and/or arguments[@@iterator] have been
+ *     modified.  Use initialLength(), hasOverriddenLength(), and
+ *     hasOverriddenIterator() to access these values.  If arguments.length has
  *     been modified, then the current value of arguments.length is stored in
  *     another slot associated with a new property.
  *   DATA_SLOT
@@ -125,7 +126,8 @@ class ArgumentsObject : public NativeObject
 
   public:
     static const uint32_t LENGTH_OVERRIDDEN_BIT = 0x1;
-    static const uint32_t PACKED_BITS_COUNT = 1;
+    static const uint32_t ITERATOR_OVERRIDDEN_BIT = 0x2;
+    static const uint32_t PACKED_BITS_COUNT = 2;
 
   protected:
     template <typename CopyArgs>
@@ -185,6 +187,18 @@ class ArgumentsObject : public NativeObject
         setFixedSlot(INITIAL_LENGTH_SLOT, Int32Value(v));
     }
 
+    /* True iff arguments[@@iterator] has been assigned or its attributes
+     * changed. */
+    bool hasOverriddenIterator() const {
+        const Value& v = getFixedSlot(INITIAL_LENGTH_SLOT);
+        return v.toInt32() & ITERATOR_OVERRIDDEN_BIT;
+    }
+
+    void markIteratorOverridden() {
+        uint32_t v = getFixedSlot(INITIAL_LENGTH_SLOT).toInt32() | ITERATOR_OVERRIDDEN_BIT;
+        setFixedSlot(INITIAL_LENGTH_SLOT, Int32Value(v));
+    }
+
     /*
      * Because the arguments object is a real object, its elements may be
      * deleted. This is implemented by setting a 'deleted' flag for the arg
diff --git a/js/src/vm/ArrayBufferObject.cpp b/js/src/vm/ArrayBufferObject.cpp
index a82a855c0f..beedc9c385 100644
--- a/js/src/vm/ArrayBufferObject.cpp
+++ b/js/src/vm/ArrayBufferObject.cpp
@@ -576,16 +576,17 @@ ArrayBufferObject::setDataPointer(BufferContents contents, OwnsState ownsData)
     setFlags((flags() & ~KIND_MASK) | contents.kind());
 }
 
-size_t
+uint32_t
 ArrayBufferObject::byteLength() const
 {
-    return size_t(getSlot(BYTE_LENGTH_SLOT).toDouble());
+    return getSlot(BYTE_LENGTH_SLOT).toInt32();
 }
 
 void
-ArrayBufferObject::setByteLength(size_t length)
+ArrayBufferObject::setByteLength(uint32_t length)
 {
-    setSlot(BYTE_LENGTH_SLOT, DoubleValue(length));
+    MOZ_ASSERT(length <= INT32_MAX);
+    setSlot(BYTE_LENGTH_SLOT, Int32Value(length));
 }
 
 uint32_t
@@ -755,10 +756,10 @@ ArrayBufferObject::addSizeOfExcludingThis(JSObject* obj, mozilla::MallocSizeOf m
 
     switch (buffer.bufferKind()) {
       case PLAIN:
-        info->objectsMallocHeapElementsNonAsmJS += mallocSizeOf(buffer.dataPointer());
+        info->objectsMallocHeapElementsNormal += mallocSizeOf(buffer.dataPointer());
         break;
       case MAPPED:
-        info->objectsNonHeapElementsMapped += buffer.byteLength();
+        info->objectsNonHeapElementsNormal += buffer.byteLength();
         break;
       case WASM_MALLOCED:
         info->objectsMallocHeapElementsAsmJS += mallocSizeOf(buffer.dataPointer());
diff --git a/js/src/vm/ArrayBufferObject.h b/js/src/vm/ArrayBufferObject.h
index 49cb887cdf..68f8a99db5 100644
--- a/js/src/vm/ArrayBufferObject.h
+++ b/js/src/vm/ArrayBufferObject.h
@@ -288,7 +288,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared
   public:
     uint8_t* dataPointer() const;
     SharedMem<uint8_t*> dataPointerShared() const;
-    size_t byteLength() const;
+    uint32_t byteLength() const;
     BufferContents contents() const {
         return BufferContents(dataPointer(), bufferKind());
     }
@@ -339,7 +339,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared
 
   protected:
     void setDataPointer(BufferContents contents, OwnsState ownsState);
-    void setByteLength(size_t length);
+    void setByteLength(uint32_t length);
 
     uint32_t flags() const;
     void setFlags(uint32_t flags);
diff --git a/js/src/vm/CallNonGenericMethod.cpp b/js/src/vm/CallNonGenericMethod.cpp
index 157f348e21..131c447421 100644
--- a/js/src/vm/CallNonGenericMethod.cpp
+++ b/js/src/vm/CallNonGenericMethod.cpp
@@ -11,6 +11,7 @@
 
 #include "proxy/Proxy.h"
 #include "vm/ProxyObject.h"
+#include "vm/SelfHosting.h"
 
 using namespace js;
 
@@ -27,6 +28,9 @@ JS::detail::CallMethodIfWrapped(JSContext* cx, IsAcceptableThis test, NativeImpl
             return Proxy::nativeCall(cx, test, impl, args);
     }
 
+    if (IsCallSelfHostedNonGenericMethod(impl))
+        return ReportIncompatibleSelfHostedMethod(cx, args);
+
     ReportIncompatible(cx, args);
     return false;
 }
diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h
index 4027ed06f3..8587a874cb 100644
--- a/js/src/vm/CommonPropertyNames.h
+++ b/js/src/vm/CommonPropertyNames.h
@@ -296,17 +296,15 @@
     macro(iterator, iterator, "iterator") \
     macro(match, match, "match") \
     macro(species, species, "species") \
-    macro(hasInstance, hasInstance, "hasInstance") \
     macro(toPrimitive, toPrimitive, "toPrimitive") \
     macro(unscopables, unscopables, "unscopables") \
     /* Same goes for the descriptions of the well-known symbols. */ \
     macro(Symbol_create, Symbol_create, "Symbol.create") \
-    macro(Symbol_isConcatSpreadable, Symbol_isConcatSpreadable, "Symbol.isConcatSpreadable") \
-    macro(Symbol_isRegExp, Symbol_isRegExp, "Symbol.isRegExp") \
-    macro(Symbol_iterator, Symbol_iterator, "Symbol.iterator") \
-    macro(Symbol_match, Symbol_match, "Symbol.match") \
-    macro(Symbol_species,  Symbol_species,  "Symbol.species") \
     macro(Symbol_hasInstance, Symbol_hasInstance, "Symbol.hasInstance") \
+    macro(Symbol_isConcatSpreadable, Symbol_isConcatSpreadable, "Symbol.isConcatSpreadable") \
+    macro(Symbol_iterator, Symbol_iterator, "Symbol.iterator") \
+    macro(Symbol_match,    Symbol_match,    "Symbol.match") \
+    macro(Symbol_species,  Symbol_species,  "Symbol.species") \
     macro(Symbol_toPrimitive, Symbol_toPrimitive, "Symbol.toPrimitive") \
     macro(Symbol_unscopables, Symbol_unscopables, "Symbol.unscopables") \
     /* Function names for properties named by symbols. */ \
diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp
index 7acf84f58d..ba457ea451 100644
--- a/js/src/vm/Debugger.cpp
+++ b/js/src/vm/Debugger.cpp
@@ -20,6 +20,7 @@
 #include "jswrapper.h"
 
 #include "frontend/BytecodeCompiler.h"
+#include "frontend/Parser.h"
 #include "gc/Marking.h"
 #include "jit/BaselineDebugModeOSR.h"
 #include "jit/BaselineJIT.h"
@@ -248,6 +249,28 @@ class Debugger::FrameRange
     }
 };
 
+class AutoRestoreCompartmentDebugMode
+{
+    JSCompartment* comp_;
+    unsigned bits_;
+
+  public:
+    explicit AutoRestoreCompartmentDebugMode(JSCompartment* comp)
+      : comp_(comp), bits_(comp->debugModeBits)
+    {
+        MOZ_ASSERT(comp_);
+    }
+
+    ~AutoRestoreCompartmentDebugMode() {
+        if (comp_)
+            comp_->debugModeBits = bits_;
+    }
+
+    void release() {
+        comp_ = nullptr;
+    }
+};
+
 // Given a Debugger instance dbg, if it is enabled, prevents all its debuggee
 // compartments from executing scripts. Attempts to run script will throw an
 // instance of Debugger.DebuggeeWouldRun from the topmost locked Debugger's
@@ -2295,27 +2318,6 @@ UpdateExecutionObservabilityOfScriptsInZone(JSContext* cx, Zone* zone,
     JSRuntime* rt = cx->runtime();
     FreeOp* fop = cx->runtime()->defaultFreeOp();
 
-    // Mark active baseline scripts in the observable set so that they don't
-    // get discarded. They will be recompiled.
-    for (JitActivationIterator actIter(rt); !actIter.done(); ++actIter) {
-        if (actIter->compartment()->zone() != zone)
-            continue;
-
-        for (JitFrameIterator iter(actIter); !iter.done(); ++iter) {
-            switch (iter.type()) {
-              case JitFrame_BaselineJS:
-                MarkBaselineScriptActiveIfObservable(iter.script(), obs);
-                break;
-              case JitFrame_IonJS:
-                MarkBaselineScriptActiveIfObservable(iter.script(), obs);
-                for (InlineFrameIterator inlineIter(rt, &iter); inlineIter.more(); ++inlineIter)
-                    MarkBaselineScriptActiveIfObservable(inlineIter.script(), obs);
-                break;
-              default:;
-            }
-        }
-    }
-
     Vector<JSScript*> scripts(cx);
 
     // Iterate through observable scripts, invalidating their Ion scripts and
@@ -2340,6 +2342,30 @@ UpdateExecutionObservabilityOfScriptsInZone(JSContext* cx, Zone* zone,
         }
     }
 
+    // Code below this point must be infallible to ensure the active bit of
+    // BaselineScripts is in a consistent state.
+    //
+    // Mark active baseline scripts in the observable set so that they don't
+    // get discarded. They will be recompiled.
+    for (JitActivationIterator actIter(rt); !actIter.done(); ++actIter) {
+        if (actIter->compartment()->zone() != zone)
+            continue;
+
+        for (JitFrameIterator iter(actIter); !iter.done(); ++iter) {
+            switch (iter.type()) {
+              case JitFrame_BaselineJS:
+                MarkBaselineScriptActiveIfObservable(iter.script(), obs);
+                break;
+              case JitFrame_IonJS:
+                MarkBaselineScriptActiveIfObservable(iter.script(), obs);
+                for (InlineFrameIterator inlineIter(rt, &iter); inlineIter.more(); ++inlineIter)
+                    MarkBaselineScriptActiveIfObservable(inlineIter.script(), obs);
+                break;
+              default:;
+            }
+        }
+    }
+
     // Iterate through the scripts again and finish discarding
     // BaselineScripts. This must be done as a separate phase as we can only
     // discard the BaselineScript on scripts that have no IonScript.
@@ -2475,11 +2501,16 @@ Debugger::updateObservesAllExecutionOnDebuggees(JSContext* cx, IsObserving obser
         // so add the compartment to the set only if we are observing.
         if (observing && !obs.add(comp))
             return false;
-
-        comp->updateDebuggerObservesAllExecution();
     }
 
-    return updateExecutionObservability(cx, obs, observing);
+    if (!updateExecutionObservability(cx, obs, observing))
+        return false;
+
+    typedef ExecutionObservableCompartments::CompartmentRange CompartmentRange;
+    for (CompartmentRange r = obs.compartments()->all(); !r.empty(); r.popFront())
+        r.front()->updateDebuggerObservesAllExecution();
+
+    return true;
 }
 
 bool
@@ -3039,10 +3070,14 @@ Debugger::setHookImpl(JSContext* cx, CallArgs& args, Debugger& dbg, Hook which)
         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_NOT_CALLABLE_OR_UNDEFINED);
         return false;
     }
-    dbg.object->setReservedSlot(JSSLOT_DEBUG_HOOK_START + which, args[0]);
+    uint32_t slot = JSSLOT_DEBUG_HOOK_START + which;
+    RootedValue oldHook(cx, dbg.object->getReservedSlot(slot));
+    dbg.object->setReservedSlot(slot, args[0]);
     if (hookObservesAllExecution(which)) {
-        if (!dbg.updateObservesAllExecutionOnDebuggees(cx, dbg.observesAllExecution()))
+        if (!dbg.updateObservesAllExecutionOnDebuggees(cx, dbg.observesAllExecution())) {
+            dbg.object->setReservedSlot(slot, oldHook);
             return false;
+        }
     }
     args.rval().setUndefined();
     return true;
@@ -3677,6 +3712,7 @@ Debugger::addDebuggeeGlobal(JSContext* cx, Handle<GlobalObject*> global)
     });
 
     // (6)
+    AutoRestoreCompartmentDebugMode debugModeGuard(debuggeeCompartment);
     debuggeeCompartment->setIsDebuggee();
     debuggeeCompartment->updateDebuggerObservesAsmJS();
     debuggeeCompartment->updateDebuggerObservesCoverage();
@@ -3688,6 +3724,7 @@ Debugger::addDebuggeeGlobal(JSContext* cx, Handle<GlobalObject*> global)
     zoneDebuggersGuard.release();
     debuggeeZonesGuard.release();
     allocationsTrackingGuard.release();
+    debugModeGuard.release();
     return true;
 }
 
@@ -4690,6 +4727,56 @@ Debugger::endTraceLogger(JSContext* cx, unsigned argc, Value* vp)
     return true;
 }
 
+bool
+Debugger::isCompilableUnit(JSContext* cx, unsigned argc, Value* vp)
+{
+    CallArgs args = CallArgsFromVp(argc, vp);
+
+    if (!args.requireAtLeast(cx, "Debugger.isCompilableUnit", 1))
+        return false;
+
+    if (!args[0].isString()) {
+        JS_ReportErrorNumber(cx, GetErrorMessage, nullptr,
+                             JSMSG_NOT_EXPECTED_TYPE, "Debugger.isCompilableUnit",
+                             "string", InformalValueTypeName(args[0]));
+        return false;
+    }
+
+    JSString* str = args[0].toString();
+    size_t length = GetStringLength(str);
+
+    AutoStableStringChars chars(cx);
+    if (!chars.initTwoByte(cx, str))
+        return false;
+
+    bool result = true;
+
+    CompileOptions options(cx);
+    frontend::Parser<frontend::FullParseHandler> parser(cx, &cx->tempLifoAlloc(),
+                                                        options, chars.twoByteChars(),
+                                                        length, /* foldConstants = */ true,
+                                                        nullptr, nullptr);
+    JSErrorReporter older = JS_SetErrorReporter(cx->runtime(), nullptr);
+    if (!parser.checkOptions() || !parser.parse()) {
+        // We ran into an error. If it was because we ran out of memory we report
+        // it in the usual way.
+        if (cx->isThrowingOutOfMemory()) {
+            JS_SetErrorReporter(cx->runtime(), older);
+            return false;
+        }
+
+        // If it was because we ran out of source, we return false so our caller
+        // knows to try to collect more [source].
+        if (parser.isUnexpectedEOF())
+            result = false;
+
+        cx->clearPendingException();
+    }
+    JS_SetErrorReporter(cx->runtime(), older);
+    args.rval().setBoolean(result);
+    return true;
+}
+
 bool
 Debugger::drainTraceLoggerScriptCalls(JSContext* cx, unsigned argc, Value* vp)
 {
@@ -4811,7 +4898,11 @@ const JSFunctionSpec Debugger::methods[] = {
     JS_FS_END
 };
 
-
+const JSFunctionSpec Debugger::static_methods[] {
+    JS_FN("isCompilableUnit", Debugger::isCompilableUnit, 1, 0),
+    JS_FS_END
+};
+
 /*** Debugger.Script *****************************************************************************/
 
 static inline JSScript*
@@ -5581,6 +5672,19 @@ Debugger::observesScript(JSScript* script) const
 Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePtr to,
                            ScriptFrameIter& iter)
 {
+    auto removeFromDebuggerFramesOnExit = MakeScopeExit([&] {
+        // Remove any remaining old entries on exit, as the 'from' frame will
+        // be gone. On success, the range will be empty.
+        for (Debugger::FrameRange r(from); !r.empty(); r.popFront()) {
+            r.frontFrame()->setPrivate(nullptr);
+            r.removeFrontFrame();
+        }
+
+        // Rekey missingScopes to maintain Debugger.Environment identity and
+        // forward liveScopes to point to the new frame.
+        DebugScopes::forwardLiveFrame(cx, from, to);
+    });
+
     // Forward live Debugger.Frame objects.
     for (Debugger::FrameRange r(from); !r.empty(); r.popFront()) {
         RootedNativeObject frameobj(cx, r.frontFrame());
@@ -5594,7 +5698,7 @@ Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePt
             return false;
         frameobj->setPrivate(data);
 
-        // Remove the old entry before mutating the HashMap.
+        // Remove the old frame.
         r.removeFrontFrame();
 
         // Add the frame object with |to| as key.
@@ -5604,11 +5708,6 @@ Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePt
         }
     }
 
-    // Rekey missingScopes to maintain Debugger.Environment identity and
-    // forward liveScopes to point to the new frame, as the old frame will be
-    // gone.
-    DebugScopes::forwardLiveFrame(cx, from, to);
-
     return true;
 }
 
@@ -8589,8 +8688,8 @@ JS_DefineDebuggerObject(JSContext* cx, HandleObject obj)
         return false;
     debugProto = InitClass(cx, obj,
                            objProto, &Debugger::jsclass, Debugger::construct,
-                           1, Debugger::properties, Debugger::methods, nullptr, nullptr,
-                           debugCtor.address());
+                           1, Debugger::properties, Debugger::methods, nullptr,
+                           Debugger::static_methods, debugCtor.address());
     if (!debugProto)
         return false;
 
diff --git a/js/src/vm/Debugger.h b/js/src/vm/Debugger.h
index d85c86dc2f..87c74626e0 100644
--- a/js/src/vm/Debugger.h
+++ b/js/src/vm/Debugger.h
@@ -585,6 +585,7 @@ class Debugger : private mozilla::LinkedListElement<Debugger>
     static bool drainTraceLoggerScriptCalls(JSContext* cx, unsigned argc, Value* vp);
     static bool startTraceLogger(JSContext* cx, unsigned argc, Value* vp);
     static bool endTraceLogger(JSContext* cx, unsigned argc, Value* vp);
+    static bool isCompilableUnit(JSContext* cx, unsigned argc, Value* vp);
 #ifdef NIGHTLY_BUILD
     static bool setupTraceLogger(JSContext* cx, unsigned argc, Value* vp);
     static bool drainTraceLogger(JSContext* cx, unsigned argc, Value* vp);
@@ -592,6 +593,7 @@ class Debugger : private mozilla::LinkedListElement<Debugger>
     static bool construct(JSContext* cx, unsigned argc, Value* vp);
     static const JSPropertySpec properties[];
     static const JSFunctionSpec methods[];
+    static const JSFunctionSpec static_methods[];
 
     static void removeFromFrameMapsAndClearBreakpointsIn(JSContext* cx, AbstractFramePtr frame);
     static bool updateExecutionObservabilityOfFrames(JSContext* cx, const ExecutionObservableSet& obs,
diff --git a/js/src/vm/GlobalObject.cpp b/js/src/vm/GlobalObject.cpp
index e24584be05..554f15fe98 100644
--- a/js/src/vm/GlobalObject.cpp
+++ b/js/src/vm/GlobalObject.cpp
@@ -90,8 +90,22 @@ js::GlobalObject::getTypedObjectModule() const {
     return v.toObject().as<TypedObjectModuleObject>();
 }
 
-
-
+/* static */ bool
+GlobalObject::skipDeselectedConstructor(JSContext* cx, JSProtoKey key)
+{
+#ifdef ENABLE_SHARED_ARRAY_BUFFER
+    // Return true if the given constructor has been disabled at run-time.
+    switch (key) {
+      case JSProto_Atomics:
+      case JSProto_SharedArrayBuffer:
+        return !cx->compartment()->creationOptions().getSharedMemoryAndAtomicsEnabled();
+      default:
+        return false;
+    }
+#else
+    return false;
+#endif
+}
 
 /* static */ bool
 GlobalObject::ensureConstructor(JSContext* cx, Handle<GlobalObject*> global, JSProtoKey key)
@@ -118,6 +132,9 @@ GlobalObject::resolveConstructor(JSContext* cx, Handle<GlobalObject*> global, JS
     if (!init && !clasp)
         return true;  // JSProto_Null or a compile-time-disabled feature.
 
+    if (skipDeselectedConstructor(cx, key))
+        return true;
+
     // Some classes have no init routine, which means that they're disabled at
     // compile-time. We could try to enforce that callers never pass such keys
     // to resolveConstructor, but that would cramp the style of consumers like
@@ -417,6 +434,7 @@ GlobalObject::initSelfHostingBuiltins(JSContext* cx, Handle<GlobalObject*> globa
     return InitBareBuiltinCtor(cx, global, JSProto_Array) &&
            InitBareBuiltinCtor(cx, global, JSProto_TypedArray) &&
            InitBareBuiltinCtor(cx, global, JSProto_Uint8Array) &&
+           InitBareBuiltinCtor(cx, global, JSProto_Int32Array) &&
            InitBareWeakMapCtor(cx, global) &&
            InitStopIterationClass(cx, global) &&
            InitSelfHostingCollectionIteratorFunctions(cx, global) &&
@@ -661,7 +679,10 @@ GlobalObject::getSelfHostedFunction(JSContext* cx, Handle<GlobalObject*> global,
                                     HandlePropertyName selfHostedName, HandleAtom name,
                                     unsigned nargs, MutableHandleValue funVal)
 {
-    if (GlobalObject::maybeGetIntrinsicValue(cx, global, selfHostedName, funVal)) {
+    bool exists = false;
+    if (!GlobalObject::maybeGetIntrinsicValue(cx, global, selfHostedName, funVal, &exists))
+        return false;
+    if (exists) {
         RootedFunction fun(cx, &funVal.toObject().as<JSFunction>());
         if (fun->atom() == name)
             return true;
@@ -725,3 +746,18 @@ GlobalObject::addIntrinsicValue(JSContext* cx, Handle<GlobalObject*> global,
     holder->setSlot(shape->slot(), value);
     return true;
 }
+
+/* static */ bool
+GlobalObject::ensureModulePrototypesCreated(JSContext *cx, Handle<GlobalObject*> global)
+{
+    if (global->getSlot(MODULE_PROTO).isUndefined()) {
+        MOZ_ASSERT(global->getSlot(IMPORT_ENTRY_PROTO).isUndefined() &&
+                   global->getSlot(EXPORT_ENTRY_PROTO).isUndefined());
+        if (!js::InitModuleClasses(cx, global))
+            return false;
+    }
+    MOZ_ASSERT(global->getSlot(MODULE_PROTO).isObject() &&
+               global->getSlot(IMPORT_ENTRY_PROTO).isObject() &&
+               global->getSlot(EXPORT_ENTRY_PROTO).isObject());
+    return true;
+}
diff --git a/js/src/vm/GlobalObject.h b/js/src/vm/GlobalObject.h
index a7f9ceca6f..eb64ca5fb0 100644
--- a/js/src/vm/GlobalObject.h
+++ b/js/src/vm/GlobalObject.h
@@ -164,6 +164,7 @@ class GlobalObject : public NativeObject
         MOZ_ASSERT(key <= JSProto_LIMIT);
         return getSlot(APPLICATION_SLOTS + key);
     }
+    static bool skipDeselectedConstructor(JSContext* cx, JSProtoKey key);
     static bool ensureConstructor(JSContext* cx, Handle<GlobalObject*> global, JSProtoKey key);
     static bool resolveConstructor(JSContext* cx, Handle<GlobalObject*> global, JSProtoKey key);
     static bool initBuiltinConstructor(JSContext* cx, Handle<GlobalObject*> global,
@@ -467,16 +468,39 @@ class GlobalObject : public NativeObject
         return getOrCreateObject(cx, DATE_TIME_FORMAT_PROTO, initDateTimeFormatProto);
     }
 
+    static bool ensureModulePrototypesCreated(JSContext *cx, Handle<GlobalObject*> global);
+
+    JSObject* maybeGetModulePrototype() {
+        Value value = getSlot(MODULE_PROTO);
+        return value.isUndefined() ? nullptr : &value.toObject();
+    }
+
+    JSObject* maybeGetImportEntryPrototype() {
+        Value value = getSlot(IMPORT_ENTRY_PROTO);
+        return value.isUndefined() ? nullptr : &value.toObject();
+    }
+
+    JSObject* maybeGetExportEntryPrototype() {
+        Value value = getSlot(EXPORT_ENTRY_PROTO);
+        return value.isUndefined() ? nullptr : &value.toObject();
+    }
+
     JSObject* getModulePrototype() {
-        return &getSlot(MODULE_PROTO).toObject();
+        JSObject* proto = maybeGetModulePrototype();
+        MOZ_ASSERT(proto);
+        return proto;
     }
 
     JSObject* getImportEntryPrototype() {
-        return &getSlot(IMPORT_ENTRY_PROTO).toObject();
+        JSObject* proto = maybeGetImportEntryPrototype();
+        MOZ_ASSERT(proto);
+        return proto;
     }
 
     JSObject* getExportEntryPrototype() {
-        return &getSlot(EXPORT_ENTRY_PROTO).toObject();
+        JSObject* proto = maybeGetExportEntryPrototype();
+        MOZ_ASSERT(proto);
+        return proto;
     }
 
     static JSFunction*
@@ -600,7 +624,7 @@ class GlobalObject : public NativeObject
 
     static bool
     maybeGetIntrinsicValue(JSContext* cx, Handle<GlobalObject*> global, Handle<PropertyName*> name,
-                           MutableHandleValue vp)
+                           MutableHandleValue vp, bool* exists)
     {
         NativeObject* holder = getIntrinsicsHolder(cx, global);
         if (!holder)
@@ -608,15 +632,21 @@ class GlobalObject : public NativeObject
 
         if (Shape* shape = holder->lookupPure(name)) {
             vp.set(holder->getSlot(shape->slot()));
-            return true;
+            *exists = true;
+        } else {
+            *exists = false;
         }
-        return false;
+
+        return true;
     }
 
     static bool getIntrinsicValue(JSContext* cx, Handle<GlobalObject*> global,
                                   HandlePropertyName name, MutableHandleValue value)
     {
-        if (GlobalObject::maybeGetIntrinsicValue(cx, global, name, value))
+        bool exists = false;
+        if (!GlobalObject::maybeGetIntrinsicValue(cx, global, name, value, &exists))
+            return false;
+        if (exists)
             return true;
         if (!cx->runtime()->cloneSelfHostedValue(cx, name, value))
             return false;
diff --git a/js/src/vm/HelperThreads.cpp b/js/src/vm/HelperThreads.cpp
index b3203a98e3..b7030aa8c8 100644
--- a/js/src/vm/HelperThreads.cpp
+++ b/js/src/vm/HelperThreads.cpp
@@ -195,10 +195,10 @@ static const JSClass parseTaskGlobalClass = {
     JS_GlobalObjectTraceHook
 };
 
-ParseTask::ParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, JSContext* initCx,
-                     const char16_t* chars, size_t length,
+ParseTask::ParseTask(ParseTaskKind kind, ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+                     JSContext* initCx, const char16_t* chars, size_t length,
                      JS::OffThreadCompileCallback callback, void* callbackData)
-  : cx(cx), options(initCx), chars(chars), length(length),
+  : kind(kind), cx(cx), options(initCx), chars(chars), length(length),
     alloc(JSRuntime::TEMP_LIFO_ALLOC_PRIMARY_CHUNK_SIZE),
     exclusiveContextGlobal(initCx->runtime(), exclusiveContextGlobal),
     callback(callback), callbackData(callbackData),
@@ -244,6 +244,52 @@ ParseTask::~ParseTask()
         js_delete(errors[i]);
 }
 
+ScriptParseTask::ScriptParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+                                 JSContext* initCx, const char16_t* chars, size_t length,
+                                 JS::OffThreadCompileCallback callback, void* callbackData)
+  : ParseTask(ParseTaskKind::Script, cx, exclusiveContextGlobal, initCx, chars, length, callback,
+              callbackData)
+{
+}
+
+void
+ScriptParseTask::parse()
+{
+    SourceBufferHolder srcBuf(chars, length, SourceBufferHolder::NoOwnership);
+
+    // ! WARNING WARNING WARNING !
+    //
+    // See comment in Parser::bindLexical about optimizing global lexical
+    // bindings. If we start optimizing them, passing in task->cx's
+    // global lexical scope would be incorrect!
+    //
+    // ! WARNING WARNING WARNING !
+    Rooted<ClonedBlockObject*> globalLexical(cx, &cx->global()->lexicalScope());
+    Rooted<StaticScope*> staticScope(cx, &globalLexical->staticBlock());
+    script = frontend::CompileScript(cx, &alloc, globalLexical, staticScope, nullptr,
+                                     options, srcBuf,
+                                     /* source_ = */ nullptr,
+                                     /* extraSct = */ nullptr,
+                                     /* sourceObjectOut = */ sourceObject.address());
+}
+
+ModuleParseTask::ModuleParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+                                 JSContext* initCx, const char16_t* chars, size_t length,
+                                 JS::OffThreadCompileCallback callback, void* callbackData)
+  : ParseTask(ParseTaskKind::Module, cx, exclusiveContextGlobal, initCx, chars, length, callback,
+              callbackData)
+{
+}
+
+void
+ModuleParseTask::parse()
+{
+    SourceBufferHolder srcBuf(chars, length, SourceBufferHolder::NoOwnership);
+    ModuleObject* module = frontend::CompileModule(cx, options, srcBuf, &alloc);
+    if (module)
+        script = module->script();
+}
+
 void
 js::CancelOffThreadParses(JSRuntime* rt)
 {
@@ -285,7 +331,8 @@ js::CancelOffThreadParses(JSRuntime* rt)
             if (task->runtimeMatches(rt)) {
                 found = true;
                 AutoUnlockHelperThreadState unlock;
-                HelperThreadState().finishParseTask(/* maybecx = */ nullptr, rt, task);
+                HelperThreadState().finishParseTask(/* maybecx = */ nullptr, rt, task->kind,
+                                                    task);
             }
         }
         if (!found)
@@ -319,7 +366,7 @@ EnsureConstructor(JSContext* cx, Handle<GlobalObject*> global, JSProtoKey key)
 // Initialize all classes potentially created during parsing for use in parser
 // data structures, template objects, &c.
 static bool
-EnsureParserCreatedClasses(JSContext* cx)
+EnsureParserCreatedClasses(JSContext* cx, ParseTaskKind kind)
 {
     Handle<GlobalObject*> global = cx->global();
 
@@ -338,18 +385,15 @@ EnsureParserCreatedClasses(JSContext* cx)
     if (!GlobalObject::initStarGenerators(cx, global))
         return false; // needed by function*() {} and generator comprehensions
 
+    if (kind == ParseTaskKind::Module && !GlobalObject::ensureModulePrototypesCreated(cx, global))
+        return false;
+
     return true;
 }
 
-bool
-js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options,
-                              const char16_t* chars, size_t length,
-                              JS::OffThreadCompileCallback callback, void* callbackData)
+static JSObject*
+CreateGlobalForOffThreadParse(JSContext* cx, ParseTaskKind kind, const gc::AutoSuppressGC& nogc)
 {
-    // Suppress GC so that calls below do not trigger a new incremental GC
-    // which could require barriers on the atoms compartment.
-    gc::AutoSuppressGC suppress(cx);
-
     JSCompartment* currentCompartment = cx->compartment();
 
     JS::CompartmentOptions compartmentOptions(currentCompartment->creationOptions(),
@@ -367,21 +411,60 @@ js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& optio
     JSObject* global = JS_NewGlobalObject(cx, &parseTaskGlobalClass, nullptr,
                                           JS::FireOnNewGlobalHook, compartmentOptions);
     if (!global)
-        return false;
+        return nullptr;
 
     JS_SetCompartmentPrincipals(global->compartment(), currentCompartment->principals());
 
     // Initialize all classes required for parsing while still on the main
     // thread, for both the target and the new global so that prototype
     // pointers can be changed infallibly after parsing finishes.
-    if (!EnsureParserCreatedClasses(cx))
-        return false;
+    if (!EnsureParserCreatedClasses(cx, kind))
+        return nullptr;
     {
         AutoCompartment ac(cx, global);
-        if (!EnsureParserCreatedClasses(cx))
-            return false;
+        if (!EnsureParserCreatedClasses(cx, kind))
+            return nullptr;
     }
 
+    return global;
+}
+
+static bool
+QueueOffThreadParseTask(JSContext* cx, ParseTask* task)
+{
+    if (OffThreadParsingMustWaitForGC(cx->runtime())) {
+        AutoLockHelperThreadState lock;
+        if (!HelperThreadState().parseWaitingOnGC().append(task)) {
+            ReportOutOfMemory(cx);
+            return false;
+        }
+    } else {
+        AutoLockHelperThreadState lock;
+        if (!HelperThreadState().parseWorklist().append(task)) {
+            ReportOutOfMemory(cx);
+            return false;
+        }
+
+        task->activate(cx->runtime());
+        HelperThreadState().notifyOne(GlobalHelperThreadState::PRODUCER);
+    }
+
+    return true;
+}
+
+bool
+js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options,
+                              const char16_t* chars, size_t length,
+                              JS::OffThreadCompileCallback callback, void* callbackData)
+{
+    // Suppress GC so that calls below do not trigger a new incremental GC
+    // which could require barriers on the atoms compartment.
+    gc::AutoSuppressGC nogc(cx);
+
+    JSObject* global = CreateGlobalForOffThreadParse(cx, ParseTaskKind::Script, nogc);
+    if (!global)
+        return false;
+
     ScopedJSDeletePtr<ExclusiveContext> helpercx(
         cx->new_<ExclusiveContext>(cx->runtime(), (PerThreadData*) nullptr,
                                    ExclusiveContext::Context_Exclusive));
@@ -389,32 +472,50 @@ js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& optio
         return false;
 
     ScopedJSDeletePtr<ParseTask> task(
-        cx->new_<ParseTask>(helpercx.get(), global, cx, chars, length,
-                            callback, callbackData));
+        cx->new_<ScriptParseTask>(helpercx.get(), global, cx, chars, length,
+                                  callback, callbackData));
     if (!task)
         return false;
 
     helpercx.forget();
 
-    if (!task->init(cx, options))
+    if (!task->init(cx, options) || !QueueOffThreadParseTask(cx, task))
         return false;
 
-    if (OffThreadParsingMustWaitForGC(cx->runtime())) {
-        AutoLockHelperThreadState lock;
-        if (!HelperThreadState().parseWaitingOnGC().append(task.get())) {
-            ReportOutOfMemory(cx);
-            return false;
-        }
-    } else {
-        AutoLockHelperThreadState lock;
-        if (!HelperThreadState().parseWorklist().append(task.get())) {
-            ReportOutOfMemory(cx);
-            return false;
-        }
+    task.forget();
 
-        task->activate(cx->runtime());
-        HelperThreadState().notifyOne(GlobalHelperThreadState::PRODUCER);
-    }
+    return true;
+}
+
+bool
+js::StartOffThreadParseModule(JSContext* cx, const ReadOnlyCompileOptions& options,
+                              const char16_t* chars, size_t length,
+                              JS::OffThreadCompileCallback callback, void* callbackData)
+{
+    // Suppress GC so that calls below do not trigger a new incremental GC
+    // which could require barriers on the atoms compartment.
+    gc::AutoSuppressGC nogc(cx);
+
+    JSObject* global = CreateGlobalForOffThreadParse(cx, ParseTaskKind::Module, nogc);
+    if (!global)
+        return false;
+
+    ScopedJSDeletePtr<ExclusiveContext> helpercx(
+        cx->new_<ExclusiveContext>(cx->runtime(), (PerThreadData*) nullptr,
+                                   ExclusiveContext::Context_Exclusive));
+    if (!helpercx)
+        return false;
+
+    ScopedJSDeletePtr<ParseTask> task(
+        cx->new_<ModuleParseTask>(helpercx.get(), global, cx, chars, length,
+                                  callback, callbackData));
+    if (!task)
+        return false;
+
+    helpercx.forget();
+
+    if (!task->init(cx, options) || !QueueOffThreadParseTask(cx, task))
+        return false;
 
     task.forget();
 
@@ -1013,7 +1114,8 @@ LeaveParseTaskZone(JSRuntime* rt, ParseTask* task)
 }
 
 JSScript*
-GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void* token)
+GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, ParseTaskKind kind,
+                                         void* token)
 {
     ScopedJSDeletePtr<ParseTask> parseTask;
 
@@ -1031,6 +1133,7 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void
         }
     }
     MOZ_ASSERT(parseTask);
+    MOZ_ASSERT(parseTask->kind == kind);
 
     if (!maybecx) {
         LeaveParseTaskZone(rt, parseTask);
@@ -1043,7 +1146,7 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void
     // Make sure we have all the constructors we need for the prototype
     // remapping below, since we can't GC while that's happening.
     Rooted<GlobalObject*> global(cx, &cx->global()->as<GlobalObject>());
-    if (!EnsureParserCreatedClasses(cx)) {
+    if (!EnsureParserCreatedClasses(cx, kind)) {
         LeaveParseTaskZone(rt, parseTask);
         return nullptr;
     }
@@ -1089,6 +1192,34 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void
     return script;
 }
 
+JSScript*
+GlobalHelperThreadState::finishScriptParseTask(JSContext* maybecx, JSRuntime* rt, void* token)
+{
+    JSScript* script = finishParseTask(maybecx, rt, ParseTaskKind::Script, token);
+    MOZ_ASSERT_IF(script, script->isGlobalCode());
+    return script;
+}
+
+JSObject*
+GlobalHelperThreadState::finishModuleParseTask(JSContext* maybecx, JSRuntime* rt, void* token)
+{
+    JSScript* script = finishParseTask(maybecx, rt, ParseTaskKind::Module, token);
+    if (!script)
+        return nullptr;
+
+    MOZ_ASSERT(script->module());
+    if (!maybecx)
+        return nullptr;
+
+    JSContext* cx = maybecx;
+    RootedModuleObject module(cx, script->module());
+    module->fixScopesAfterCompartmentMerge(cx);
+    if (!ModuleObject::FreezeArrayProperties(cx, module))
+        return nullptr;
+
+    return module;
+}
+
 JSObject*
 GlobalObject::getStarGeneratorFunctionPrototype()
 {
@@ -1116,8 +1247,13 @@ GlobalHelperThreadState::mergeParseTaskCompartment(JSRuntime* rt, ParseTask* par
         // Generator functions don't have Function.prototype as prototype but a
         // different function object, so the IdentifyStandardPrototype trick
         // below won't work.  Just special-case it.
-        JSObject* parseTaskStarGenFunctionProto =
-            parseTask->exclusiveContextGlobal->as<GlobalObject>().getStarGeneratorFunctionPrototype();
+        GlobalObject* parseGlobal = &parseTask->exclusiveContextGlobal->as<GlobalObject>();
+        JSObject* parseTaskStarGenFunctionProto = parseGlobal->getStarGeneratorFunctionPrototype();
+
+        // Module objects don't have standard prototypes either.
+        JSObject* moduleProto = parseGlobal->maybeGetModulePrototype();
+        JSObject* importEntryProto = parseGlobal->maybeGetImportEntryPrototype();
+        JSObject* exportEntryProto = parseGlobal->maybeGetExportEntryPrototype();
 
         // Point the prototypes of any objects in the script's compartment to refer
         // to the corresponding prototype in the new compartment. This will briefly
@@ -1132,21 +1268,24 @@ GlobalHelperThreadState::mergeParseTaskCompartment(JSRuntime* rt, ParseTask* par
             JSObject* protoObj = proto.toObject();
 
             JSObject* newProto;
-            if (protoObj == parseTaskStarGenFunctionProto) {
-                newProto = global->getStarGeneratorFunctionPrototype();
-            } else {
-                JSProtoKey key = JS::IdentifyStandardPrototype(protoObj);
-                if (key == JSProto_Null)
-                    continue;
-
+            JSProtoKey key = JS::IdentifyStandardPrototype(protoObj);
+            if (key != JSProto_Null) {
                 MOZ_ASSERT(key == JSProto_Object || key == JSProto_Array ||
                            key == JSProto_Function || key == JSProto_RegExp ||
                            key == JSProto_Iterator);
-
                 newProto = GetBuiltinPrototypePure(global, key);
+            } else if (protoObj == parseTaskStarGenFunctionProto) {
+                newProto = global->getStarGeneratorFunctionPrototype();
+            } else if (protoObj == moduleProto) {
+                newProto = global->getModulePrototype();
+            } else if (protoObj == importEntryProto) {
+                newProto = global->getImportEntryPrototype();
+            } else if (protoObj == exportEntryProto) {
+                newProto = global->getExportEntryPrototype();
+            } else {
+                continue;
             }
 
-            MOZ_ASSERT(newProto);
             group->setProtoUnchecked(TaggedProto(newProto));
         }
     }
@@ -1387,25 +1526,7 @@ HelperThread::handleParseWorkload()
         AutoUnlockHelperThreadState unlock;
         PerThreadData::AutoEnterRuntime enter(threadData.ptr(),
                                               task->exclusiveContextGlobal->runtimeFromAnyThread());
-        SourceBufferHolder srcBuf(task->chars, task->length,
-                                  SourceBufferHolder::NoOwnership);
-
-        // ! WARNING WARNING WARNING !
-        //
-        // See comment in Parser::bindLexical about optimizing global lexical
-        // bindings. If we start optimizing them, passing in task->cx's
-        // global lexical scope would be incorrect!
-        //
-        // ! WARNING WARNING WARNING !
-        ExclusiveContext* parseCx = task->cx;
-        Rooted<ClonedBlockObject*> globalLexical(parseCx, &parseCx->global()->lexicalScope());
-        Rooted<StaticScope*> staticScope(parseCx, &globalLexical->staticBlock());
-        task->script = frontend::CompileScript(parseCx, &task->alloc,
-                                               globalLexical, staticScope, nullptr,
-                                               task->options, srcBuf,
-                                               /* source_ = */ nullptr,
-                                               /* extraSct = */ nullptr,
-                                               /* sourceObjectOut = */ task->sourceObject.address());
+        task->parse();
     }
 
     // The callback is invoked while we are still off the main thread.
diff --git a/js/src/vm/HelperThreads.h b/js/src/vm/HelperThreads.h
index 5eae2d21eb..76fccf8f56 100644
--- a/js/src/vm/HelperThreads.h
+++ b/js/src/vm/HelperThreads.h
@@ -37,6 +37,12 @@ namespace wasm {
   typedef Vector<IonCompileTask*, 0, SystemAllocPolicy> IonCompileTaskVector;
 } // namespace wasm
 
+enum class ParseTaskKind
+{
+    Script,
+    Module
+};
+
 // Per-process state for off thread work items.
 class GlobalHelperThreadState
 {
@@ -219,6 +225,11 @@ class GlobalHelperThreadState
         return bool(numWasmFailedJobs);
     }
 
+    JSScript* finishParseTask(JSContext* maybecx, JSRuntime* rt, ParseTaskKind kind, void* token);
+    void mergeParseTaskCompartment(JSRuntime* rt, ParseTask* parseTask,
+                                   Handle<GlobalObject*> global,
+                                   JSCompartment* dest);
+
   private:
     /*
      * Number of wasm jobs that encountered failure for the active module.
@@ -227,10 +238,8 @@ class GlobalHelperThreadState
     uint32_t numWasmFailedJobs;
 
   public:
-    JSScript* finishParseTask(JSContext* maybecx, JSRuntime* rt, void* token);
-    void mergeParseTaskCompartment(JSRuntime* rt, ParseTask* parseTask,
-                                   Handle<GlobalObject*> global,
-                                   JSCompartment* dest);
+    JSScript* finishScriptParseTask(JSContext* maybecx, JSRuntime* rt, void* token);
+    JSObject* finishModuleParseTask(JSContext* maybecx, JSRuntime* rt, void* token);
     bool compressionInProgress(SourceCompressionTask* task);
     SourceCompressionTask* compressionTaskForSource(ScriptSource* ss);
 
@@ -410,6 +419,11 @@ StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options,
                           const char16_t* chars, size_t length,
                           JS::OffThreadCompileCallback callback, void* callbackData);
 
+bool
+StartOffThreadParseModule(JSContext* cx, const ReadOnlyCompileOptions& options,
+                          const char16_t* chars, size_t length,
+                          JS::OffThreadCompileCallback callback, void* callbackData);
+
 /*
  * Called at the end of GC to enqueue any Parse tasks that were waiting on an
  * atoms-zone GC to finish.
@@ -463,6 +477,7 @@ class MOZ_RAII AutoUnlockHelperThreadState
 
 struct ParseTask
 {
+    ParseTaskKind kind;
     ExclusiveContext* cx;
     OwningCompileOptions options;
     const char16_t* chars;
@@ -490,19 +505,36 @@ struct ParseTask
     bool overRecursed;
     bool outOfMemory;
 
-    ParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+    ParseTask(ParseTaskKind kind, ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
               JSContext* initCx, const char16_t* chars, size_t length,
               JS::OffThreadCompileCallback callback, void* callbackData);
     bool init(JSContext* cx, const ReadOnlyCompileOptions& options);
 
     void activate(JSRuntime* rt);
+    virtual void parse() = 0;
     bool finish(JSContext* cx);
 
     bool runtimeMatches(JSRuntime* rt) {
         return exclusiveContextGlobal->runtimeFromAnyThread() == rt;
     }
 
-    ~ParseTask();
+    virtual ~ParseTask();
+};
+
+struct ScriptParseTask : public ParseTask
+{
+    ScriptParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+                    JSContext* initCx, const char16_t* chars, size_t length,
+                    JS::OffThreadCompileCallback callback, void* callbackData);
+    void parse() override;
+};
+
+struct ModuleParseTask : public ParseTask
+{
+    ModuleParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal,
+                    JSContext* initCx, const char16_t* chars, size_t length,
+                    JS::OffThreadCompileCallback callback, void* callbackData);
+    void parse() override;
 };
 
 // Return whether, if a new parse task was started, it would need to wait for
diff --git a/js/src/vm/NativeObject.cpp b/js/src/vm/NativeObject.cpp
index b2c01e91cf..06b1d6b70d 100644
--- a/js/src/vm/NativeObject.cpp
+++ b/js/src/vm/NativeObject.cpp
@@ -1345,6 +1345,11 @@ js::NativeDefineProperty(ExclusiveContext* cx, HandleNativeObject obj, HandleId
             if ((desc_.attributes() & JSPROP_RESOLVING) == 0)
                 obj->as<ArgumentsObject>().markLengthOverridden();
         }
+        if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) {
+            // Do same thing as .length for [@@iterator].
+            if ((desc_.attributes() & JSPROP_RESOLVING) == 0)
+                obj->as<ArgumentsObject>().markIteratorOverridden();
+        }
     }
 
     // 9.1.6.1 OrdinaryDefineOwnProperty steps 1-2.
diff --git a/js/src/vm/SavedStacks.cpp b/js/src/vm/SavedStacks.cpp
index 636a8c0046..0937c14c9f 100644
--- a/js/src/vm/SavedStacks.cpp
+++ b/js/src/vm/SavedStacks.cpp
@@ -1028,10 +1028,10 @@ SavedStacks::copyAsyncStack(JSContext* cx, HandleObject asyncStack, HandleString
 }
 
 void
-SavedStacks::sweep(JSRuntime* rt)
+SavedStacks::sweep()
 {
     frames.sweep();
-    sweepPCLocationMap();
+    pcLocationMap.sweep();
 }
 
 void
@@ -1303,24 +1303,6 @@ SavedStacks::createFrameFromLookup(JSContext* cx, SavedFrame::HandleLookup looku
     return frame;
 }
 
-/*
- * Remove entries from the table whose JSScript is being collected.
- */
-void
-SavedStacks::sweepPCLocationMap()
-{
-    for (PCLocationMap::Enum e(pcLocationMap); !e.empty(); e.popFront()) {
-        PCKey key = e.front().key();
-        JSScript* script = key.script.get();
-        if (IsAboutToBeFinalizedUnbarriered(&script)) {
-            e.removeFront();
-        } else if (script != key.script.get()) {
-            key.script = script;
-            e.rekeyFront(key);
-        }
-    }
-}
-
 bool
 SavedStacks::getLocation(JSContext* cx, const FrameIter& iter,
                          MutableHandle<LocationValue> locationp)
diff --git a/js/src/vm/SavedStacks.h b/js/src/vm/SavedStacks.h
index 2abd9a383f..eae6b91fb1 100644
--- a/js/src/vm/SavedStacks.h
+++ b/js/src/vm/SavedStacks.h
@@ -167,7 +167,7 @@ class SavedStacks {
     bool     saveCurrentStack(JSContext* cx, MutableHandleSavedFrame frame, unsigned maxFrameCount = 0);
     bool     copyAsyncStack(JSContext* cx, HandleObject asyncStack, HandleString asyncCause,
                             MutableHandleSavedFrame adoptedStack, unsigned maxFrameCount = 0);
-    void     sweep(JSRuntime* rt);
+    void     sweep();
     void     trace(JSTracer* trc);
     uint32_t count();
     void     clear();
@@ -220,28 +220,32 @@ class SavedStacks {
     struct PCKey {
         PCKey(JSScript* script, jsbytecode* pc) : script(script), pc(pc) { }
 
-        PreBarrieredScript script;
-        jsbytecode*        pc;
+        RelocatablePtrScript script;
+        jsbytecode* pc;
+
+        bool needsSweep() { return IsAboutToBeFinalized(&script); }
     };
 
   public:
     struct LocationValue {
         LocationValue() : source(nullptr), line(0), column(0) { }
         LocationValue(JSAtom* source, size_t line, uint32_t column)
-            : source(source),
-              line(line),
-              column(column)
+            : source(source), line(line), column(column)
         { }
 
-        static void trace(LocationValue* self, JSTracer* trc) { self->trace(trc); }
         void trace(JSTracer* trc) {
             if (source)
                 TraceEdge(trc, &source, "SavedStacks::LocationValue::source");
         }
 
-        PreBarrieredAtom source;
-        size_t           line;
-        uint32_t         column;
+        bool needsSweep() {
+            MOZ_ASSERT(source);
+            return !IsAboutToBeFinalized(&source);
+        }
+
+        RelocatablePtrAtom source;
+        size_t line;
+        uint32_t column;
     };
 
     template <typename Outer>
@@ -264,8 +268,8 @@ class SavedStacks {
 
   private:
     struct PCLocationHasher : public DefaultHasher<PCKey> {
-        typedef PointerHasher<JSScript*, 3>   ScriptPtrHasher;
-        typedef PointerHasher<jsbytecode*, 3> BytecodePtrHasher;
+        using ScriptPtrHasher = DefaultHasher<JSScript*>;
+        using BytecodePtrHasher = DefaultHasher<jsbytecode*>;
 
         static HashNumber hash(const PCKey& key) {
             return mozilla::AddToHash(ScriptPtrHasher::hash(key.script),
@@ -273,15 +277,14 @@ class SavedStacks {
         }
 
         static bool match(const PCKey& l, const PCKey& k) {
-            return l.script == k.script && l.pc == k.pc;
+            return ScriptPtrHasher::match(l.script, k.script) &&
+                   BytecodePtrHasher::match(l.pc, k.pc);
         }
     };
 
-    typedef HashMap<PCKey, LocationValue, PCLocationHasher, SystemAllocPolicy> PCLocationMap;
-
+    using PCLocationMap = GCHashMap<PCKey, LocationValue, PCLocationHasher, SystemAllocPolicy>;
     PCLocationMap pcLocationMap;
 
-    void sweepPCLocationMap();
     bool getLocation(JSContext* cx, const FrameIter& iter, MutableHandle<LocationValue> locationp);
 };
 
diff --git a/js/src/vm/ScopeObject.cpp b/js/src/vm/ScopeObject.cpp
index 7113746c2c..073dff8d4b 100644
--- a/js/src/vm/ScopeObject.cpp
+++ b/js/src/vm/ScopeObject.cpp
@@ -1479,6 +1479,13 @@ ScopeIter::settle()
     if (frame_ && frame_.isFunctionFrame() && frame_.callee()->needsCallObject() &&
         !frame_.hasCallObj())
     {
+        // At the very start of a script that starts with a lexical block, the
+        // static scope will be that block. Skip it if this is the case.
+        if (ssi_.type() == StaticScopeIter<CanGC>::Block)
+            incrementStaticScopeIter();
+
+        // We should now be at the function scope, so since we have no
+        // CallObject, skip that too.
         MOZ_ASSERT(ssi_.type() == StaticScopeIter<CanGC>::Function);
         incrementStaticScopeIter();
     }
@@ -1486,8 +1493,14 @@ ScopeIter::settle()
     // Check for trying to iterate a strict eval frame before the prologue has
     // created the CallObject.
     if (frame_ && frame_.isStrictEvalFrame() && !frame_.hasCallObj() && !ssi_.done()) {
+        // Eval frames always have their own lexical scope. Analogous to the
+        // function frame case above, if the script starts with a lexical
+        // block, the SSI could see 2 block scopes here. So skip between 1-2
+        // static block scopes here.
         MOZ_ASSERT(ssi_.type() == StaticScopeIter<CanGC>::Block);
         incrementStaticScopeIter();
+        if (ssi_.type() == StaticScopeIter<CanGC>::Block)
+            incrementStaticScopeIter();
         MOZ_ASSERT(ssi_.type() == StaticScopeIter<CanGC>::Eval);
         MOZ_ASSERT(maybeStaticScope() == frame_.script()->enclosingStaticScope());
         incrementStaticScopeIter();
diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp
index 93d0fd7720..d2222c68ef 100644
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -780,6 +780,64 @@ intrinsic_ArrayBufferCopyData(JSContext* cx, unsigned argc, Value* vp)
     return true;
 }
 
+static bool
+intrinsic_IsSpecificTypedArray(JSContext* cx, unsigned argc, Value* vp, Scalar::Type type)
+{
+    CallArgs args = CallArgsFromVp(argc, vp);
+    MOZ_ASSERT(args.length() == 1);
+    MOZ_ASSERT(args[0].isObject());
+
+    JSObject* obj = &args[0].toObject();
+
+    bool isArray = JS_GetArrayBufferViewType(obj) == type;
+
+    args.rval().setBoolean(isArray);
+    return true;
+}
+
+static bool
+intrinsic_IsUint8TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint8) ||
+           intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint8Clamped);
+}
+
+static bool
+intrinsic_IsInt8TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int8);
+}
+
+static bool
+intrinsic_IsUint16TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint16);
+}
+
+static bool
+intrinsic_IsInt16TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int16);
+}
+
+static bool
+intrinsic_IsUint32TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint32);
+}
+
+static bool
+intrinsic_IsInt32TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int32);
+}
+
+static bool
+intrinsic_IsFloat32TypedArray(JSContext* cx, unsigned argc, Value* vp)
+{
+    return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Float32);
+}
+
 static bool
 intrinsic_IsPossiblyWrappedTypedArray(JSContext* cx, unsigned argc, Value* vp)
 {
@@ -1327,6 +1385,34 @@ CallNonGenericSelfhostedMethod(JSContext* cx, unsigned argc, Value* vp)
     return CallNonGenericMethod<Test, CallSelfHostedNonGenericMethod>(cx, args);
 }
 
+bool
+js::IsCallSelfHostedNonGenericMethod(NativeImpl impl)
+{
+    return impl == CallSelfHostedNonGenericMethod;
+}
+
+bool
+js::ReportIncompatibleSelfHostedMethod(JSContext* cx, const CallArgs& args)
+{
+    // The contract for this function is the same as CallSelfHostedNonGenericMethod.
+    // The normal ReportIncompatible function doesn't work for selfhosted functions,
+    // because they always call the different CallXXXMethodIfWrapped methods,
+    // which would be reported as the called function instead.
+
+    // Lookup the selfhosted method that was invoked.
+    ScriptFrameIter iter(cx);
+    MOZ_ASSERT(iter.isFunctionFrame());
+
+    JSAutoByteString funNameBytes;
+    if (const char* funName = GetFunctionNameBytes(cx, iter.callee(cx), &funNameBytes)) {
+        JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_METHOD,
+                             funName, "method", InformalValueTypeName(args.thisv()));
+    }
+
+    return false;
+}
+
+
 /**
  * Returns the default locale as a well-formed, but not necessarily canonicalized,
  * BCP-47 language tag.
@@ -1768,6 +1854,13 @@ static const JSFunctionSpec intrinsic_functions[] = {
     JS_FN("ArrayBufferByteLength",   intrinsic_ArrayBufferByteLength,   1,0),
     JS_FN("ArrayBufferCopyData",     intrinsic_ArrayBufferCopyData,     4,0),
 
+    JS_FN("IsUint8TypedArray",        intrinsic_IsUint8TypedArray,      1,0),
+    JS_FN("IsInt8TypedArray",         intrinsic_IsInt8TypedArray,       1,0),
+    JS_FN("IsUint16TypedArray",       intrinsic_IsUint16TypedArray,     1,0),
+    JS_FN("IsInt16TypedArray",        intrinsic_IsInt16TypedArray,      1,0),
+    JS_FN("IsUint32TypedArray",       intrinsic_IsUint32TypedArray,     1,0),
+    JS_FN("IsInt32TypedArray",        intrinsic_IsInt32TypedArray,      1,0),
+    JS_FN("IsFloat32TypedArray",      intrinsic_IsFloat32TypedArray,    1,0),
     JS_INLINABLE_FN("IsTypedArray",
                     intrinsic_IsInstanceOfBuiltin<TypedArrayObject>,    1,0,
                     IntrinsicIsTypedArray),
diff --git a/js/src/vm/SelfHosting.h b/js/src/vm/SelfHosting.h
index dcd18708a5..7e2055469d 100644
--- a/js/src/vm/SelfHosting.h
+++ b/js/src/vm/SelfHosting.h
@@ -8,6 +8,7 @@
 #define vm_SelfHosting_h_
 
 #include "jsapi.h"
+#include "NamespaceImports.h"
 
 class JSAtom;
 
@@ -17,7 +18,14 @@ namespace js {
  * Check whether the given JSFunction is a self-hosted function whose
  * self-hosted name is the given name.
  */
-bool IsSelfHostedFunctionWithName(JSFunction* fun, JSAtom* name);
+bool
+IsSelfHostedFunctionWithName(JSFunction* fun, JSAtom* name);
+
+bool
+IsCallSelfHostedNonGenericMethod(NativeImpl impl);
+
+bool
+ReportIncompatibleSelfHostedMethod(JSContext* cx, const CallArgs& args);
 
 /* Get the compile options used when compiling self hosted code. */
 void
diff --git a/js/src/vm/SharedArrayObject.cpp b/js/src/vm/SharedArrayObject.cpp
index 9dfcf13aef..afdb739350 100644
--- a/js/src/vm/SharedArrayObject.cpp
+++ b/js/src/vm/SharedArrayObject.cpp
@@ -164,15 +164,15 @@ SharedArrayRawBuffer::New(JSContext* cx, uint32_t length)
 void
 SharedArrayRawBuffer::addReference()
 {
-    MOZ_ASSERT(this->refcount > 0);
-    ++this->refcount; // Atomic.
+    MOZ_ASSERT(this->refcount_ > 0);
+    ++this->refcount_; // Atomic.
 }
 
 void
 SharedArrayRawBuffer::dropReference()
 {
     // Drop the reference to the buffer.
-    uint32_t refcount = --this->refcount; // Atomic.
+    uint32_t refcount = --this->refcount_; // Atomic.
 
     // If this was the final reference, release the buffer.
     if (refcount == 0) {
@@ -330,7 +330,15 @@ SharedArrayBufferObject::Finalize(FreeOp* fop, JSObject* obj)
 SharedArrayBufferObject::addSizeOfExcludingThis(JSObject* obj, mozilla::MallocSizeOf mallocSizeOf,
                                                 JS::ClassInfo* info)
 {
-    info->objectsNonHeapElementsMapped += obj->as<SharedArrayBufferObject>().byteLength();
+    // Divide the buffer size by the refcount to get the fraction of the buffer
+    // owned by this thread. It's conceivable that the refcount might change in
+    // the middle of memory reporting, in which case the amount reported for
+    // some threads might be to high (if the refcount goes up) or too low (if
+    // the refcount goes down). But that's unlikely and hard to avoid, so we
+    // just live with the risk.
+    const SharedArrayBufferObject& buf = obj->as<SharedArrayBufferObject>();
+    info->objectsNonHeapElementsShared +=
+        buf.byteLength() / buf.rawBufferObject()->refcount();
 }
 
 const Class SharedArrayBufferObject::protoClass = {
diff --git a/js/src/vm/SharedArrayObject.h b/js/src/vm/SharedArrayObject.h
index a2db1040a5..7e7b15f96d 100644
--- a/js/src/vm/SharedArrayObject.h
+++ b/js/src/vm/SharedArrayObject.h
@@ -44,7 +44,7 @@ class FutexWaiter;
 class SharedArrayRawBuffer
 {
   private:
-    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> refcount;
+    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> refcount_;
     uint32_t length;
 
     // A list of structures representing tasks waiting on some
@@ -53,7 +53,7 @@ class SharedArrayRawBuffer
 
   protected:
     SharedArrayRawBuffer(uint8_t* buffer, uint32_t length)
-      : refcount(1),
+      : refcount_(1),
         length(length),
         waiters_(nullptr)
     {
@@ -84,6 +84,8 @@ class SharedArrayRawBuffer
         return length;
     }
 
+    uint32_t refcount() const { return refcount_; }
+
     void addReference();
     void dropReference();
 };
diff --git a/js/src/vm/Stack.cpp b/js/src/vm/Stack.cpp
index cf06be729b..727e24176b 100644
--- a/js/src/vm/Stack.cpp
+++ b/js/src/vm/Stack.cpp
@@ -18,6 +18,7 @@
 #include "js/GCAPI.h"
 #include "vm/Debugger.h"
 #include "vm/Opcodes.h"
+#include "vm/ScopeObject.h"
 
 #include "jit/JitFrameIterator-inl.h"
 #include "vm/Interpreter-inl.h"
diff --git a/js/xpconnect/src/XPCJSRuntime.cpp b/js/xpconnect/src/XPCJSRuntime.cpp
index 94d19cfac6..1395ee89ad 100644
--- a/js/xpconnect/src/XPCJSRuntime.cpp
+++ b/js/xpconnect/src/XPCJSRuntime.cpp
@@ -2166,10 +2166,10 @@ ReportClassStats(const ClassInfo& classInfo, const nsACString& path,
             "Non-fixed object slots.");
     }
 
-    if (classInfo.objectsMallocHeapElementsNonAsmJS > 0) {
-        REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/malloc-heap/elements/non-asm.js"),
-            KIND_HEAP, classInfo.objectsMallocHeapElementsNonAsmJS,
-            "Non-asm.js indexed elements.");
+    if (classInfo.objectsMallocHeapElementsNormal > 0) {
+        REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/malloc-heap/elements/normal"),
+            KIND_HEAP, classInfo.objectsMallocHeapElementsNormal,
+            "Normal (non-asm.js) indexed elements.");
     }
 
     // asm.js arrays are heap-allocated on some platforms and
@@ -2192,10 +2192,18 @@ ReportClassStats(const ClassInfo& classInfo, const nsACString& path,
             "the GC heap.");
     }
 
-    if (classInfo.objectsNonHeapElementsMapped > 0) {
-        REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/mapped"),
-            KIND_NONHEAP, classInfo.objectsNonHeapElementsMapped,
-            "Memory-mapped array buffer elements.");
+    if (classInfo.objectsNonHeapElementsNormal > 0) {
+        REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/normal"),
+            KIND_NONHEAP, classInfo.objectsNonHeapElementsNormal,
+            "Memory-mapped non-shared array buffer elements.");
+    }
+
+    if (classInfo.objectsNonHeapElementsShared > 0) {
+        REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/shared"),
+            KIND_NONHEAP, classInfo.objectsNonHeapElementsShared,
+            "Memory-mapped shared array buffer elements. These elements are "
+            "shared between one or more runtimes; the reported size is divided "
+            "by the buffer's refcount.");
     }
 
     if (classInfo.objectsNonHeapCodeAsmJS > 0) {
diff --git a/layout/base/FramePropertyTable.cpp b/layout/base/FramePropertyTable.cpp
index 12a1dcc3de..0fd9b1c377 100644
--- a/layout/base/FramePropertyTable.cpp
+++ b/layout/base/FramePropertyTable.cpp
@@ -123,7 +123,10 @@ FramePropertyTable::RemoveInternal(
   if (entry->mProp.mProperty == aProperty) {
     // There's only one entry and it's the one we want
     void* value = entry->mProp.mValue;
-    mEntries.RawRemoveEntry(entry);
+
+    // Here it's ok to use RemoveEntry() -- which may resize mEntries --
+    // because we null mLastEntry at the same time.
+    mEntries.RemoveEntry(entry);
     mLastEntry = nullptr;
     if (aFoundResult) {
       *aFoundResult = true;
@@ -209,6 +212,9 @@ FramePropertyTable::DeleteAllFor(const nsIFrame* aFrame)
   }
 
   DeleteAllForEntry(entry);
+
+  // mLastEntry points into mEntries, so we use RawRemoveEntry() which will not
+  // resize mEntries.
   mEntries.RawRemoveEntry(entry);
 }
 
diff --git a/layout/base/FramePropertyTable.h b/layout/base/FramePropertyTable.h
index 9688a8f159..8c0a06c1f6 100644
--- a/layout/base/FramePropertyTable.h
+++ b/layout/base/FramePropertyTable.h
@@ -347,6 +347,9 @@ protected:
 
   static void DeleteAllForEntry(Entry* aEntry);
 
+  // Note that mLastEntry points into mEntries, so we need to be careful about
+  // not triggering a resize of mEntries, e.g. use RawRemoveEntry() instead of
+  // RemoveEntry() in some places.
   nsTHashtable<Entry> mEntries;
   const nsIFrame* mLastFrame;
   Entry* mLastEntry;
diff --git a/layout/inspector/inDOMUtils.cpp b/layout/inspector/inDOMUtils.cpp
index 6dc81e5825..bb0c1d6538 100644
--- a/layout/inspector/inDOMUtils.cpp
+++ b/layout/inspector/inDOMUtils.cpp
@@ -1194,8 +1194,9 @@ inDOMUtils::GetCSSPseudoElementNames(uint32_t* aLength, char16_t*** aNames)
 {
   nsTArray<nsIAtom*> array;
 
-  const uint8_t pseudoCount = static_cast<uint8_t>(CSSPseudoElementType::Count);
-  for (uint8_t i = 0; i < pseudoCount; ++i) {
+  const CSSPseudoElementTypeBase pseudoCount =
+    static_cast<CSSPseudoElementTypeBase>(CSSPseudoElementType::Count);
+  for (CSSPseudoElementTypeBase i = 0; i < pseudoCount; ++i) {
     CSSPseudoElementType type = static_cast<CSSPseudoElementType>(i);
     if (!nsCSSPseudoElements::PseudoElementIsUASheetOnly(type)) {
       nsIAtom* atom = nsCSSPseudoElements::GetPseudoAtom(type);
diff --git a/layout/media/symbols.def.in b/layout/media/symbols.def.in
index b50fda2135..a828b4048e 100644
--- a/layout/media/symbols.def.in
+++ b/layout/media/symbols.def.in
@@ -664,6 +664,7 @@ BrotliDecoderDecompress
 BrotliDecoderStateInit
 BrotliDecoderStateCleanup
 BrotliDecoderDecompressStream
+BrotliDecoderIsFinished
 #ifdef MOZ_WEBRTC
 downmix_int
 #ifdef MOZ_SAMPLE_TYPE_FLOAT32
diff --git a/layout/reftests/bidi/613157-2-ref.html b/layout/reftests/bidi/613157-2-ref.html
index e5c64211ef..a41becbea8 100644
--- a/layout/reftests/bidi/613157-2-ref.html
+++ b/layout/reftests/bidi/613157-2-ref.html
@@ -5,6 +5,6 @@
   <title>Inline blocks shouldn't end the paragraph</title>
  </head>
  <body>
-  <p>&#x202e;אני אוהב--&gt; 4 xoferiF--&gt;8 ימים בשבוע</p>
+  <p>&#x202e;ימים בשבוע 8&lt;-- 4 xoferiF&lt;--אני אוהב</p>
  </body>
 </html>
diff --git a/layout/reftests/bidi/698706-1-ref.html b/layout/reftests/bidi/698706-1-ref.html
index cef33b4c03..a5fb48020e 100644
--- a/layout/reftests/bidi/698706-1-ref.html
+++ b/layout/reftests/bidi/698706-1-ref.html
@@ -5,6 +5,6 @@
    <title>Facebook</title>
  </head>
  <body dir="rtl">
-   <div style="font-size: 18pt; letter-spacing: 2pt;">Winnie the Pooh commented on his own קישור on Christopher Robin's wall: "Lorem ipsum dolor sit amet"</div>
+   <div style="font-size: 18pt; letter-spacing: 2pt;"><bdi>Winnie the Pooh</bdi> commented on his own קישור on <bdi>Christopher Robin</bdi>'s <bdi>wall</bdi>: "<bdi>Lorem ipsum dolor sit amet</bdi>"</div>
  </body>
 </html>
diff --git a/layout/reftests/bidi/83958-2c.html b/layout/reftests/bidi/83958-2c.html
deleted file mode 100644
index 91675b7fba..0000000000
--- a/layout/reftests/bidi/83958-2c.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
-  <meta charset="UTF-8">
-  <title>Bidirectional Text Test 2 - HTML</title>
-  <style>
-p { font-family: monospace; text-align: left; }
-.embed { unicode-bidi: embed; }
-.override { unicode-bidi: bidi-override; }
-.rtl { direction: rtl; }
-.ltr { direction: ltr; }
-  </style>
- </head>
-<!-- Testcases based on http://dbaron.org/css/test/bidi2_html by L. David Baron. -->
- <body>
-  <p>&#x05D0;&#x05D1;&#x05D2;</p>
-  <p class="embed">&#x05D0;&#x05D1;&#x05D2;</p>
-  <p class="override">&#x05D0;&#x05D1;&#x05D2;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; ABC &#x05D3;&#x05D4;&#x05D5;</p>
-  <p class="rtl">&#x05D0;&#x05D1;&#x05D2; ABC &#x05D3;&#x05D4;&#x05D5;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; ABC &#x05D3;&#x05D4;&#x05D5; DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <span>ABC &#x05D3;&#x05D4;&#x05D5;</span> DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <span dir="ltr">ABC &#x05D3;&#x05D4;&#x05D5; DEF</span> &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <span dir="rtl">ABC &#x05D3;&#x05D4;&#x05D5; DEF</span> &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <span dir="rtl">ABC &#x05D3;&#x05D4;&#x05D5;</span> DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <bdo dir="rtl">ABC &#x05D3;&#x05D4;&#x05D5;</bdo> DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <bdo dir="ltr">ABC &#x05D3;&#x05D4;&#x05D5;</bdo> DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p class="rtl">&#x05D0;&#x05D1;&#x05D2; <bdo dir="ltr">ABC &#x05D3;&#x05D4;&#x05D5;</bdo> DEF &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; ABC &#x05D3;&#x05D4;&#x05D5; DEF GHI &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; <bdo dir="rtl">ABC &#x05D3;&#x05D4;&#x05D5; DEF</bdo> GHI &#x05D6;&#x05D7;&#x05D8;</p>
-  <p>&#x05D0;&#x05D1;&#x05D2; ABC <bdo dir="rtl">&#x05D3;&#x05D4;&#x05D5; DEF</bdo> GHI &#x05D6;&#x05D7;&#x05D8;</p>
- </body>
-</html>
diff --git a/layout/reftests/bidi/reftest.list b/layout/reftests/bidi/reftest.list
index ed3053da35..f467c61829 100644
--- a/layout/reftests/bidi/reftest.list
+++ b/layout/reftests/bidi/reftest.list
@@ -52,7 +52,6 @@ random-if(cocoaWidget) == with-first-letter-2b.html with-first-letter-2-ref.html
 == 83958-1c.html 83958-1-ref.html
 fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2a.html 83958-2-ref.html
 fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2b.html 83958-2-ref.html
-fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2c.html 83958-2-ref.html
 == 115921-1.html 115921-1-ref.html
 == 115921-2.html 115921-2-ref.html
 == 151407-1.html 151407-1-ref.html
@@ -74,7 +73,7 @@ fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azur
 == 263359-1a.html 263359-1-ref.html
 != 263359-1b.html 263359-1-ref.html
 == 263359-2.html 263359-2-ref.html
-== 263359-3.html 263359-3-ref.html
+fails == 263359-3.html 263359-3-ref.html # bug 1230455
 == 263359-4.html 263359-4-ref.html
 random-if(winWidget) == 267459-1.html 267459-1-ref.html # depends on windows version, see bug 590101
 == 267459-2.html 267459-2-ref.html
diff --git a/layout/style/StyleRule.cpp b/layout/style/StyleRule.cpp
index d3f2c12ee4..a04a7bdaac 100644
--- a/layout/style/StyleRule.cpp
+++ b/layout/style/StyleRule.cpp
@@ -314,11 +314,9 @@ nsCSSSelector::nsCSSSelector(void)
     mNext(nullptr),
     mNameSpace(kNameSpaceID_Unknown),
     mOperator(0),
-    mPseudoType(static_cast<int16_t>(CSSPseudoElementType::NotPseudo))
+    mPseudoType(CSSPseudoElementType::NotPseudo)
 {
   MOZ_COUNT_CTOR(nsCSSSelector);
-  static_assert(static_cast<int16_t>(CSSPseudoElementType::MAX) < INT16_MAX,
-                "CSSPseudoElementType values overflow mPseudoType");
 }
 
 nsCSSSelector*
@@ -333,7 +331,7 @@ nsCSSSelector::Clone(bool aDeepNext, bool aDeepNegations) const
   result->mCasedTag = mCasedTag;
   result->mOperator = mOperator;
   result->mPseudoType = mPseudoType;
-  
+
   NS_IF_CLONE(mIDList);
   NS_IF_CLONE(mClassList);
   NS_IF_CLONE(mPseudoClassList);
diff --git a/layout/style/StyleRule.h b/layout/style/StyleRule.h
index 4fb1bf8e3d..e0144f4f0a 100644
--- a/layout/style/StyleRule.h
+++ b/layout/style/StyleRule.h
@@ -25,6 +25,7 @@ class nsIAtom;
 struct nsCSSSelectorList;
 
 namespace mozilla {
+enum class CSSPseudoElementType : uint8_t;
 class CSSStyleSheet;
 } // namespace mozilla
 
@@ -208,11 +209,9 @@ private:
 
 public:
   // Get and set the selector's pseudo type
-  mozilla::CSSPseudoElementType PseudoType() const {
-    return static_cast<mozilla::CSSPseudoElementType>(mPseudoType);
-  }
+  mozilla::CSSPseudoElementType PseudoType() const { return mPseudoType; }
   void SetPseudoType(mozilla::CSSPseudoElementType aType) {
-    mPseudoType = static_cast<int16_t>(aType);
+    mPseudoType = aType;
   }
 
   size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
@@ -233,8 +232,9 @@ public:
   int32_t         mNameSpace;
   char16_t       mOperator;
 private:
-  // int16_t to make sure it packs well with mOperator
-  int16_t        mPseudoType;
+  // The underlying type of CSSPseudoElementType is uint8_t and
+  // it packs well with mOperator. (char16_t + uint8_t is less than 32bits.)
+  mozilla::CSSPseudoElementType mPseudoType;
 
   nsCSSSelector(const nsCSSSelector& aCopy) = delete;
   nsCSSSelector& operator=(const nsCSSSelector& aCopy) = delete;
diff --git a/layout/style/html.css b/layout/style/html.css
index 16f39c847b..0f3bdc6986 100644
--- a/layout/style/html.css
+++ b/layout/style/html.css
@@ -7,13 +7,14 @@
 
 /* bidi */
 
+[dir] {
+  unicode-bidi: -moz-isolate;
+}
 [dir="rtl"] {
   direction: rtl;
-  unicode-bidi: embed;
 }
 [dir="ltr"] {
   direction: ltr;
-  unicode-bidi: embed;
 }
 
 bdi:-moz-dir(ltr), [dir="auto"]:-moz-dir(ltr) { direction: ltr; }
@@ -30,62 +31,62 @@ bdi:-moz-dir(rtl), [dir="auto"]:-moz-dir(rtl) { direction: rtl; }
  * and the rules in http://dev.w3.org/html5/spec/rendering.html#rendering
  */
 
-address, address[dir],
-article, article[dir],
-aside, aside[dir],
-blockquote, blockquote[dir],
-body, body[dir],
-caption, caption[dir],
-center, center[dir],
-col, col[dir],
-colgroup, colgroup[dir],
-dd, dd[dir],
-dir, dir[dir],
-div, div[dir],
-dl, dl[dir],
-dt, dt[dir],
-fieldset, fieldset[dir],
-figcaption, figcaption[dir],
-figure, figure[dir],
-footer, footer[dir],
-form, form[dir],
-h1, h1[dir],
-h2, h2[dir],
-h3, h3[dir],
-h4, h4[dir],
-h5, h5[dir],
-h6, h6[dir],
-header, header[dir],
-hgroup, hgroup[dir],
-hr, hr[dir],
-html, html[dir],
-legend, legend[dir],
-li, li[dir],
-listing, listing[dir],
-main, main[dir],
-marquee, marquee[dir],
-menu, menu[dir],
-nav, nav[dir],
-noframes, noframes[dir],
-ol, ol[dir],
-p, p[dir],
-plaintext, plaintext[dir],
-pre, pre[dir],
-section, section[dir],
-summary, summary[dir],
-table, table[dir],
-tbody, tbody[dir],
-td, td[dir],
-tfoot, tfoot[dir],
-th, th[dir],
-thead, thead[dir],
-tr, tr[dir],
-ul, ul[dir],
-xmp, xmp[dir] {
+address,
+article,
+aside,
+blockquote,
+body,
+caption,
+center,
+col,
+colgroup,
+dd,
+dir,
+div,
+dl,
+dt,
+fieldset,
+figcaption,
+figure,
+footer,
+form,
+h1,
+h2,
+h3,
+h4,
+h5,
+h6,
+header,
+hgroup,
+hr,
+html,
+legend,
+li,
+listing,
+main,
+marquee,
+menu,
+nav,
+noframes,
+ol,
+p,
+plaintext,
+pre,
+section,
+summary,
+table,
+tbody,
+td,
+tfoot,
+th,
+thead,
+tr,
+ul,
+xmp {
   unicode-bidi: -moz-isolate;
 }
 
-bdi, bdi[dir], output, output[dir], [dir="auto"] { 
+bdi, output {
   unicode-bidi: -moz-isolate;
 }
 bdo, bdo[dir] {
@@ -724,7 +725,7 @@ area {
   display: none ! important;
 }
 
-iframe:-moz-full-screen {
+iframe:fullscreen {
   /* iframes in full-screen mode don't show a border. */
   border: none !important;
   padding: 0 !important;
diff --git a/layout/style/nsCSSPseudoClassList.h b/layout/style/nsCSSPseudoClassList.h
index ed8650b4f1..5a73f71b82 100644
--- a/layout/style/nsCSSPseudoClassList.h
+++ b/layout/style/nsCSSPseudoClassList.h
@@ -163,6 +163,7 @@ CSS_STATE_PSEUDO_CLASS(mozDevtoolsHighlighted, ":-moz-devtools-highlighted", 0,
 
 // Matches the element which is being displayed full-screen, and
 // any containing frames.
+CSS_STATE_PSEUDO_CLASS(fullscreen, ":fullscreen", 0, "", NS_EVENT_STATE_FULL_SCREEN)
 CSS_STATE_PSEUDO_CLASS(mozFullScreen, ":-moz-full-screen", 0, "", NS_EVENT_STATE_FULL_SCREEN)
 
 // Matches any element which is an ancestor of the DOM full-screen element,
diff --git a/layout/style/nsCSSPseudoElements.cpp b/layout/style/nsCSSPseudoElements.cpp
index 7f5d85e39b..3323c5412c 100644
--- a/layout/style/nsCSSPseudoElements.cpp
+++ b/layout/style/nsCSSPseudoElements.cpp
@@ -76,7 +76,9 @@ nsCSSPseudoElements::IsCSS2PseudoElement(nsIAtom *aAtom)
 /* static */ CSSPseudoElementType
 nsCSSPseudoElements::GetPseudoType(nsIAtom *aAtom)
 {
-  for (uint8_t i = 0; i < ArrayLength(CSSPseudoElements_info); ++i) {
+  for (CSSPseudoElementTypeBase i = 0;
+       i < ArrayLength(CSSPseudoElements_info);
+       ++i) {
     if (*CSSPseudoElements_info[i].mAtom == aAtom) {
       return static_cast<Type>(i);
     }
@@ -99,13 +101,14 @@ nsCSSPseudoElements::GetPseudoType(nsIAtom *aAtom)
 nsCSSPseudoElements::GetPseudoAtom(Type aType)
 {
   NS_ASSERTION(aType < Type::Count, "Unexpected type");
-  return *CSSPseudoElements_info[static_cast<uint8_t>(aType)].mAtom;
+  return *CSSPseudoElements_info[
+    static_cast<CSSPseudoElementTypeBase>(aType)].mAtom;
 }
 
 /* static */ uint32_t
 nsCSSPseudoElements::FlagsForPseudoElement(const Type aType)
 {
-  uint8_t index = static_cast<uint8_t>(aType);
+  CSSPseudoElementTypeBase index = static_cast<CSSPseudoElementTypeBase>(aType);
   NS_ASSERTION(index < ArrayLength(CSSPseudoElements_flags),
                "argument must be a pseudo-element");
   return CSSPseudoElements_flags[index];
diff --git a/layout/style/nsCSSPseudoElements.h b/layout/style/nsCSSPseudoElements.h
index 8d7ec5c6cf..a3f7c16aa7 100644
--- a/layout/style/nsCSSPseudoElements.h
+++ b/layout/style/nsCSSPseudoElements.h
@@ -39,7 +39,8 @@ namespace mozilla {
 
 // The total count of CSSPseudoElement is less than 256,
 // so use uint8_t as its underlying type.
-enum class CSSPseudoElementType : uint8_t {
+typedef uint8_t CSSPseudoElementTypeBase;
+enum class CSSPseudoElementType : CSSPseudoElementTypeBase {
   // If the actual pseudo-elements stop being first here, change
   // GetPseudoType.
 #define CSS_PSEUDO_ELEMENT(_name, _value_, _flags) \
diff --git a/layout/style/nsCSSRuleProcessor.cpp b/layout/style/nsCSSRuleProcessor.cpp
index c167e22b05..1401299ab7 100644
--- a/layout/style/nsCSSRuleProcessor.cpp
+++ b/layout/style/nsCSSRuleProcessor.cpp
@@ -910,8 +910,8 @@ struct RuleCascadeData {
   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
 
   RuleHash                 mRuleHash;
-  RuleHash*
-    mPseudoElementRuleHashes[static_cast<uint8_t>(CSSPseudoElementType::Count)];
+  RuleHash*                mPseudoElementRuleHashes[
+    static_cast<CSSPseudoElementTypeBase>(CSSPseudoElementType::Count)];
   nsTArray<nsCSSRuleProcessor::StateSelector>  mStateSelectors;
   EventStates              mSelectorDocumentStates;
   PLDHashTable             mClassSelectors;
@@ -2645,9 +2645,8 @@ nsCSSRuleProcessor::RulesMatching(PseudoElementRuleProcessorData* aData)
   RuleCascadeData* cascade = GetRuleCascade(aData->mPresContext);
 
   if (cascade) {
-    RuleHash* ruleHash =
-      cascade->mPseudoElementRuleHashes[static_cast<uint8_t>(
-        aData->mPseudoType)];
+    RuleHash* ruleHash = cascade->mPseudoElementRuleHashes[
+      static_cast<CSSPseudoElementTypeBase>(aData->mPseudoType)];
     if (ruleHash) {
       NodeMatchContext nodeContext(EventStates(),
                                    nsCSSRuleProcessor::IsLink(aData->mElement));
@@ -3416,8 +3415,8 @@ AddRule(RuleSelectorPair* aRuleInfo, RuleCascadeData* aCascade)
   if (MOZ_LIKELY(pseudoType == CSSPseudoElementType::NotPseudo)) {
     cascade->mRuleHash.AppendRule(*aRuleInfo);
   } else if (pseudoType < CSSPseudoElementType::Count) {
-    RuleHash*& ruleHash =
-      cascade->mPseudoElementRuleHashes[static_cast<uint8_t>(pseudoType)];
+    RuleHash*& ruleHash = cascade->mPseudoElementRuleHashes[
+      static_cast<CSSPseudoElementTypeBase>(pseudoType)];
     if (!ruleHash) {
       ruleHash = new RuleHash(cascade->mQuirksMode);
       if (!ruleHash) {
diff --git a/layout/style/nsStyleContext.cpp b/layout/style/nsStyleContext.cpp
index 32d842cba0..23327d8d2f 100644
--- a/layout/style/nsStyleContext.cpp
+++ b/layout/style/nsStyleContext.cpp
@@ -90,7 +90,8 @@ nsStyleContext::nsStyleContext(nsStyleContext* aParent,
   // This check has to be done "backward", because if it were written the
   // more natural way it wouldn't fail even when it needed to.
   static_assert((UINT64_MAX >> NS_STYLE_CONTEXT_TYPE_SHIFT) >=
-                 static_cast<uint8_t>(CSSPseudoElementType::MAX),
+                 static_cast<CSSPseudoElementTypeBase>(
+                   CSSPseudoElementType::MAX),
                 "pseudo element bits no longer fit in a uint64_t");
   MOZ_ASSERT(aRuleNode);
 
diff --git a/layout/style/test/test_default_bidi_css.html b/layout/style/test/test_default_bidi_css.html
index 3386a075ea..3da5047500 100644
--- a/layout/style/test/test_default_bidi_css.html
+++ b/layout/style/test/test_default_bidi_css.html
@@ -32,10 +32,10 @@ var tests = [
     ['div', {'dir': ''}, 'ltr', '-moz-isolate'],
 
     ['span', {}, 'ltr', 'normal'],
-    ['span', {'dir': 'ltr'}, 'ltr', 'embed'],
-    ['span', {'dir': 'rtl'}, 'rtl', 'embed'],
+    ['span', {'dir': 'ltr'}, 'ltr', '-moz-isolate'],
+    ['span', {'dir': 'rtl'}, 'rtl', '-moz-isolate'],
     ['span', {'dir': 'auto'}, 'ltr', '-moz-isolate'],
-    ['span', {'dir': ''}, 'ltr', 'normal'],
+    ['span', {'dir': ''}, 'ltr', '-moz-isolate'],
 
     ['bdi', {}, 'ltr', '-moz-isolate'],
     ['bdi', {'dir': 'ltr'}, 'ltr', '-moz-isolate'],
@@ -56,10 +56,10 @@ var tests = [
     ['bdo', {'dir': ''}, 'ltr', 'bidi-override'],
 
     ['textarea', {}, 'ltr', 'normal'],
-    ['textarea', {'dir': 'ltr'}, 'ltr', 'embed'],
-    ['textarea', {'dir': 'rtl'}, 'rtl', 'embed'],
+    ['textarea', {'dir': 'ltr'}, 'ltr', '-moz-isolate'],
+    ['textarea', {'dir': 'rtl'}, 'rtl', '-moz-isolate'],
     ['textarea', {'dir': 'auto'}, 'ltr', '-moz-plaintext'],
-    ['textarea', {'dir': ''}, 'ltr', 'normal'],
+    ['textarea', {'dir': ''}, 'ltr', '-moz-isolate'],
 
     ['pre', {}, 'ltr', '-moz-isolate'],
     ['pre', {'dir': 'ltr'}, 'ltr', '-moz-isolate'],
diff --git a/layout/style/ua.css b/layout/style/ua.css
index ba1b4d5363..fe88b6d7e0 100644
--- a/layout/style/ua.css
+++ b/layout/style/ua.css
@@ -276,7 +276,7 @@
 
 }
 
-*|*:-moz-full-screen:not(:root) {
+*|*:fullscreen:not(:root) {
   position: fixed !important;
   top: 0 !important;
   left: 0 !important;
@@ -296,7 +296,7 @@
 
 /* Selectors here should match the check in
  * nsViewportFrame.cpp:ShouldInTopLayerForFullscreen() */
-*|*:-moz-full-screen:not(:root):not(:-moz-browser-frame) {
+*|*:fullscreen:not(:root):not(:-moz-browser-frame) {
   -moz-top-layer: top !important;
 }
 
diff --git a/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp b/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp
index e6d5bd07eb..e760151eca 100644
--- a/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp
+++ b/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp
@@ -50,16 +50,6 @@ ClearKeySessionManager::~ClearKeySessionManager()
   CK_LOGD("ClearKeySessionManager dtor %p", this);
 }
 
-static bool
-ShouldBeAbleToDecode()
-{
-#if !defined(ENABLE_WMF)
-  return false;
-#else
-  return IsWindowsVistaOrGreater();
-#endif
-}
-
 static bool
 CanDecode()
 {
@@ -75,17 +65,12 @@ ClearKeySessionManager::Init(GMPDecryptorCallback* aCallback)
 {
   CK_LOGD("ClearKeySessionManager::Init");
   mCallback = aCallback;
-  if (ShouldBeAbleToDecode()) {
-    if (!CanDecode()) {
-      const char* err = "EME plugin can't load system decoder!";
-      mCallback->SessionError(nullptr, 0, kGMPAbortError, 0, err, strlen(err));
-    } else {
-      mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AND_DECODE_AUDIO |
-                                 GMP_EME_CAP_DECRYPT_AND_DECODE_VIDEO);
-    }
-  } else {
+  if (!CanDecode()) {
     mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AUDIO |
                                GMP_EME_CAP_DECRYPT_VIDEO);
+  } else {
+    mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AND_DECODE_AUDIO |
+                               GMP_EME_CAP_DECRYPT_AND_DECODE_VIDEO);
   }
   ClearKeyPersistence::EnsureInitialized();
 }
diff --git a/media/libogg/moz.build b/media/libogg/moz.build
index cecda3d824..964e3a0ea5 100644
--- a/media/libogg/moz.build
+++ b/media/libogg/moz.build
@@ -4,6 +4,9 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+with Files('*'):
+    BUG_COMPONENT = ('Core', 'Video/Audio')
+
 EXPORTS.ogg += [
     'include/ogg/config_types.h',
     'include/ogg/ogg.h',
diff --git a/media/libopus/README_MOZILLA b/media/libopus/README_MOZILLA
index 2d85c9f99e..641da75ee2 100644
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -8,4 +8,4 @@ files after the copy step.
 
 The upstream repository is https://git.xiph.org/opus.git
 
-The git tag/revision used was v1.1.
+The git tag/revision used was v1.1.2.
diff --git a/media/libopus/celt/_kiss_fft_guts.h b/media/libopus/celt/_kiss_fft_guts.h
index aefe490e11..5e3d58fd66 100644
--- a/media/libopus/celt/_kiss_fft_guts.h
+++ b/media/libopus/celt/_kiss_fft_guts.h
@@ -65,10 +65,6 @@
       do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
           (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
 
-#   define C_MUL4(m,a,b) \
-      do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
-          (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
-
 #   define C_MULBYSCALAR( c, s ) \
       do{ (c).r =  S_MUL( (c).r , s ) ;\
           (c).i =  S_MUL( (c).i , s ) ; }while(0)
@@ -101,6 +97,9 @@
 #if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
+#if defined(MIPSr1_ASM)
+#include "mips/kiss_fft_mipsr1.h"
+#endif
 
 #else  /* not FIXED_POINT*/
 
diff --git a/media/libopus/celt/arch.h b/media/libopus/celt/arch.h
index 3bbcd3663a..9f74ddd267 100644
--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@@ -69,11 +69,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
 
 #define IMUL32(a,b) ((a)*(b))
 
-#define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
-#define ABS16(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 16-bit value.  */
 #define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */
 #define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
-#define ABS32(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 32-bit value.  */
 #define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
 #define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
@@ -108,6 +105,13 @@ typedef opus_val32 celt_ener;
 #define SCALEIN(a)      (a)
 #define SCALEOUT(a)     (a)
 
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
+
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"
 #else
@@ -137,6 +141,22 @@ typedef float celt_sig;
 typedef float celt_norm;
 typedef float celt_ener;
 
+#ifdef FLOAT_APPROX
+/* This code should reliably detect NaN/inf even when -ffast-math is used.
+   Assumes IEEE 754 format. */
+static OPUS_INLINE int celt_isnan(float x)
+{
+   union {float f; opus_uint32 i;} in;
+   in.f = x;
+   return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
+}
+#else
+#ifdef __FAST_MATH__
+#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
+#endif
+#define celt_isnan(x) ((x)!=(x))
+#endif
+
 #define Q15ONE 1.0f
 
 #define NORM_SCALING 1.f
@@ -146,6 +166,10 @@ typedef float celt_ener;
 #define VERY_LARGE16 1e15f
 #define Q15_ONE ((opus_val16)1.f)
 
+/* This appears to be the same speed as C99's fabsf() but it's more portable. */
+#define ABS16(x) ((float)fabs(x))
+#define ABS32(x) ((float)fabs(x))
+
 #define QCONST16(x,bits) (x)
 #define QCONST32(x,bits) (x)
 
@@ -184,6 +208,7 @@ typedef float celt_ener;
 #define MULT32_32_Q31(a,b)     ((a)*(b))
 
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
+#define MAC16_32_Q16(c,a,b)     ((c)+(a)*(b))
 
 #define MULT16_16_Q11_32(a,b)     ((a)*(b))
 #define MULT16_16_Q11(a,b)     ((a)*(b))
@@ -201,6 +226,8 @@ typedef float celt_ener;
 #define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
 #define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))
 
+#define SIG2WORD16(x) (x)
+
 #endif /* !FIXED_POINT */
 
 #ifndef GLOBAL_STACK_SIZE
diff --git a/media/libopus/celt/arm/arm2gnu.pl b/media/libopus/celt/arm/arm2gnu.pl
index eab42efa2b..6c922ac819 100755
--- a/media/libopus/celt/arm/arm2gnu.pl
+++ b/media/libopus/celt/arm/arm2gnu.pl
@@ -1,7 +1,33 @@
 #!/usr/bin/perl
+# Copyright (C) 2002-2013 Xiph.org Foundation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 my $bigend;  # little/big endian
 my $nxstack;
+my $apple = 0;
+my $symprefix = "";
 
 $nxstack = 0;
 
@@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
 
 while ($ARGV[0] =~ /^-/) {
     $_ = shift;
-  last if /^--/;
-    if (/^-n/) {
+  last if /^--$/;
+    if (/^-n$/) {
     $nflag++;
     next;
     }
+    if (/^--apple$/) {
+        $apple = 1;
+        $symprefix = "_";
+        next;
+    }
     die "I don't recognize this switch: $_\\n";
 }
 $printit++ unless $nflag;
@@ -25,6 +56,8 @@ $n=0;
 $thumb = 0;     # ARM mode by default, not Thumb.
 @proc_stack = ();
 
+printf ("    .syntax unified\n");
+
 LINE:
 while (<>) {
 
@@ -53,7 +86,7 @@ while (<>) {
     s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
     s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
     s/\bIMPORT\b/.extern/;
-    s/\bEXPORT\b/.global/;
+    s/\bEXPORT\b\s*/.global $symprefix/;
     s/^(\s+)\[/$1IF/;
     s/^(\s+)\|/$1ELSE/;
     s/^(\s+)\]/$1ENDIF/;
@@ -109,7 +142,7 @@ while (<>) {
             # won't match the original source file (we could use the .line
             # directive, which is documented to be obsolete, but then gdb will
             # show the wrong line in the translated source file).
-            s/$/;   .arch armv7-a\n   .fpu neon\n   .object_arch armv4t/;
+            s/$/;   .arch armv7-a\n   .fpu neon\n   .object_arch armv4t/ unless ($apple);
         }
     }
 
@@ -131,9 +164,13 @@ while (<>) {
         $prefix = "";
         if ($proc)
         {
-            $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
+            $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
+            # Make sure we $prefix isn't empty here (for the $apple case).
+            # We handle mangling the label here, make sure it doesn't match
+            # the label handling below (if $prefix would be empty).
+            $prefix = "; ";
             push(@proc_stack, $proc);
-            s/^[A-Za-z_\.]\w+/$&:/;
+            s/^[A-Za-z_\.]\w+/$symprefix$&:/;
         }
         $prefix = $prefix."\t.thumb_func; " if ($thumb);
         s/\bPROC\b/@ $&/;
@@ -146,7 +183,7 @@ while (<>) {
         my $proc;
         s/\bENDP\b/@ $&/;
         $proc = pop(@proc_stack);
-        $_ = "\t.size $proc, .-$proc".$_ if ($proc);
+        $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
     }
     s/\bSUBT\b/@ $&/;
     s/\bDATA\b/@ $&/;   # DATA directive is deprecated -- Asm guide, p.7-25
@@ -311,6 +348,6 @@ while (<>) {
 }
 #If we had a code section, mark that this object doesn't need an executable
 # stack.
-if ($nxstack) {
+if ($nxstack && !$apple) {
     printf ("    .section\t.note.GNU-stack,\"\",\%\%progbits\n");
 }
diff --git a/media/libopus/celt/arm/arm_celt_map.c b/media/libopus/celt/arm/arm_celt_map.c
index 547a84d149..ee6c244786 100644
--- a/media/libopus/celt/arm/arm_celt_map.c
+++ b/media/libopus/celt/arm/arm_celt_map.c
@@ -30,6 +30,8 @@
 #endif
 
 #include "pitch.h"
+#include "kiss_fft.h"
+#include "mdct.h"
 
 #if defined(OPUS_HAVE_RTCD)
 
@@ -41,9 +43,79 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
-# else
-#  error "Floating-point implementation is not supported by ARM asm yet." \
- "Reconfigure with --disable-rtcd or send patches."
-# endif
+# else /* !FIXED_POINT */
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int, int) = {
+  celt_pitch_xcorr_c,              /* ARMv4 */
+  celt_pitch_xcorr_c,              /* EDSP */
+  celt_pitch_xcorr_c,              /* Media */
+  celt_pitch_xcorr_float_neon      /* Neon */
+};
+#  endif
+# endif /* FIXED_POINT */
 
-#endif
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(HAVE_ARM_NE10)
+#   if defined(CUSTOM_MODES)
+int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+   opus_fft_alloc_arch_c,        /* ARMv4 */
+   opus_fft_alloc_arch_c,        /* EDSP */
+   opus_fft_alloc_arch_c,        /* Media */
+   opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
+};
+
+void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+   opus_fft_free_arch_c,         /* ARMv4 */
+   opus_fft_free_arch_c,         /* EDSP */
+   opus_fft_free_arch_c,         /* Media */
+   opus_fft_free_arm_neon        /* Neon with NE10 */
+};
+#   endif /* CUSTOM_MODES */
+
+void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+                                        const kiss_fft_cpx *fin,
+                                        kiss_fft_cpx *fout) = {
+   opus_fft_c,                   /* ARMv4 */
+   opus_fft_c,                   /* EDSP */
+   opus_fft_c,                   /* Media */
+   opus_fft_neon                 /* Neon with NE10 */
+};
+
+void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+                                         const kiss_fft_cpx *fin,
+                                         kiss_fft_cpx *fout) = {
+   opus_ifft_c,                   /* ARMv4 */
+   opus_ifft_c,                   /* EDSP */
+   opus_ifft_c,                   /* Media */
+   opus_ifft_neon                 /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+                                                     kiss_fft_scalar *in,
+                                                     kiss_fft_scalar * OPUS_RESTRICT out,
+                                                     const opus_val16 *window,
+                                                     int overlap, int shift,
+                                                     int stride, int arch) = {
+   clt_mdct_forward_c,           /* ARMv4 */
+   clt_mdct_forward_c,           /* EDSP */
+   clt_mdct_forward_c,           /* Media */
+   clt_mdct_forward_neon         /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+                                                      kiss_fft_scalar *in,
+                                                      kiss_fft_scalar * OPUS_RESTRICT out,
+                                                      const opus_val16 *window,
+                                                      int overlap, int shift,
+                                                      int stride, int arch) = {
+   clt_mdct_backward_c,           /* ARMv4 */
+   clt_mdct_backward_c,           /* EDSP */
+   clt_mdct_backward_c,           /* Media */
+   clt_mdct_backward_neon         /* Neon with NE10 */
+};
+
+#  endif /* HAVE_ARM_NE10 */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
+
+#endif /* OPUS_HAVE_RTCD */
diff --git a/media/libopus/celt/arm/armcpu.c b/media/libopus/celt/arm/armcpu.c
index 17685258b1..5e5d10c344 100644
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@@ -73,7 +73,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
-#   if defined(OPUS_ARM_MAY_HAVE_NEON)
+#   if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*VORR q0,q0,q0*/
     __emit(0xF2200150);
@@ -107,7 +107,7 @@ opus_uint32 opus_cpu_capabilities(void)
 
     while(fgets(buf, 512, cpuinfo) != NULL)
     {
-# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for edsp and neon flag */
       if(memcmp(buf, "Features", 8) == 0)
       {
@@ -118,7 +118,7 @@ opus_uint32 opus_cpu_capabilities(void)
           flags |= OPUS_CPU_ARM_EDSP;
 #  endif
 
-#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
         p = strstr(buf, " neon");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
           flags |= OPUS_CPU_ARM_NEON;
diff --git a/media/libopus/celt/arm/celt_ne10_fft.c b/media/libopus/celt/arm/celt_ne10_fft.c
new file mode 100644
index 0000000000..42d96a7117
--- /dev/null
+++ b/media/libopus/celt/arm/celt_ne10_fft.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_ne10_fft.c
+   @brief ARM Neon optimizations for fft using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include <NE10_init.h>
+#include <NE10_dsp.h>
+#include "os_support.h"
+#include "kiss_fft.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
+#else
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
+#endif
+
+#if defined(CUSTOM_MODES)
+
+/* nfft lengths in NE10 that support scaled fft */
+# define NE10_FFTSCALED_SUPPORT_MAX 4
+static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
+   480, 240, 120, 60
+};
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st)
+{
+   int i;
+   size_t memneeded = sizeof(struct arch_fft_state);
+
+   st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
+   if (!st->arch_fft)
+      return -1;
+
+   for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
+      if(st->nfft == ne10_fft_scaled_support[i])
+         break;
+   }
+   if (i == NE10_FFTSCALED_SUPPORT_MAX) {
+      /* This nfft length (scaled fft) is not supported in NE10 */
+      st->arch_fft->is_supported = 0;
+      st->arch_fft->priv = NULL;
+   }
+   else {
+      st->arch_fft->is_supported = 1;
+      st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
+      if (st->arch_fft->priv == NULL) {
+         return -1;
+      }
+   }
+   return 0;
+}
+
+void opus_fft_free_arm_neon(kiss_fft_state *st)
+{
+   NE10_FFT_CFG_TYPE_T cfg;
+
+   if (!st->arch_fft)
+      return;
+
+   cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
+   if (cfg)
+      NE10_FFT_DESTROY_C2C_TYPE(cfg);
+   opus_free(st->arch_fft);
+}
+#endif
+
+void opus_fft_neon(const kiss_fft_state *st,
+                   const kiss_fft_cpx *fin,
+                   kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_fft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_forward_scaled = 1;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0, 1);
+#endif
+   }
+   RESTORE_STACK;
+}
+
+void opus_ifft_neon(const kiss_fft_state *st,
+                    const kiss_fft_cpx *fin,
+                    kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_ifft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_backward_scaled = 0;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1, 0);
+#endif
+   }
+   RESTORE_STACK;
+}
diff --git a/media/libopus/celt/arm/celt_ne10_mdct.c b/media/libopus/celt/arm/celt_ne10_mdct.c
new file mode 100644
index 0000000000..293c3efd7a
--- /dev/null
+++ b/media/libopus/celt/arm/celt_ne10_mdct.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_ne10_mdct.c
+   @brief ARM Neon optimizations for mdct using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+
+void clt_mdct_forward_neon(const mdct_lookup *l,
+                           kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window,
+                           int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+
+   SAVE_STACK;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         f2[i] = yc;
+      }
+   }
+
+   opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+void clt_mdct_backward_neon(const mdct_lookup *l,
+                            kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 * OPUS_RESTRICT window,
+                            int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   const kiss_twiddle_scalar *trig;
+   const kiss_fft_state *st = l->kfft[shift];
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+         yp[2*i] = yr;
+         yp[2*i+1] = yi;
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         re = yp1[0];
+         im = yp1[1];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}
diff --git a/media/libopus/celt/arm/celt_neon_intr.c b/media/libopus/celt/arm/celt_neon_intr.c
new file mode 100644
index 0000000000..47dce15ba5
--- /dev/null
+++ b/media/libopus/celt/arm/celt_neon_intr.c
@@ -0,0 +1,252 @@
+/* Copyright (c) 2014-2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_neon_intr.c
+   @brief ARM Neon Intrinsic optimizations for celt
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "../pitch.h"
+
+#if !defined(FIXED_POINT)
+/*
+ * Function: xcorr_kernel_neon_float
+ * ---------------------------------
+ * Computes 4 correlation values and stores them in sum[4]
+ */
+static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
+      float32_t sum[4], int len) {
+   float32x4_t YY[3];
+   float32x4_t YEXT[3];
+   float32x4_t XX[2];
+   float32x2_t XX_2;
+   float32x4_t SUMM;
+   const float32_t *xi = x;
+   const float32_t *yi = y;
+
+   celt_assert(len>0);
+
+   YY[0] = vld1q_f32(yi);
+   SUMM = vdupq_n_f32(0);
+
+   /* Consume 8 elements in x vector and 12 elements in y
+    * vector. However, the 12'th element never really gets
+    * touched in this loop. So, if len == 8, then we only
+    * must access y[0] to y[10]. y[11] must not be accessed
+    * hence make sure len > 8 and not len >= 8
+    */
+   while (len > 8) {
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+      YY[2] = vld1q_f32(yi);
+
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
+      YEXT[0] = vextq_f32(YY[0], YY[1], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
+      YEXT[1] = vextq_f32(YY[0], YY[1], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
+      YEXT[2] = vextq_f32(YY[0], YY[1], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
+      YEXT[0] = vextq_f32(YY[1], YY[2], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
+      YEXT[1] = vextq_f32(YY[1], YY[2], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
+      YEXT[2] = vextq_f32(YY[1], YY[2], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
+
+      YY[0] = YY[2];
+      len -= 8;
+   }
+
+   /* Consume 4 elements in x vector and 8 elements in y
+    * vector. However, the 8'th element in y never really gets
+    * touched in this loop. So, if len == 4, then we only
+    * must access y[0] to y[6]. y[7] must not be accessed
+    * hence make sure len>4 and not len>=4
+    */
+   if (len > 4) {
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
+      YEXT[0] = vextq_f32(YY[0], YY[1], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
+      YEXT[1] = vextq_f32(YY[0], YY[1], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
+      YEXT[2] = vextq_f32(YY[0], YY[1], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
+
+      YY[0] = YY[1];
+      len -= 4;
+   }
+
+   while (--len > 0) {
+      XX_2 = vld1_dup_f32(xi++);
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
+      YY[0]= vld1q_f32(++yi);
+   }
+
+   XX_2 = vld1_dup_f32(xi);
+   SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
+
+   vst1q_f32(sum, SUMM);
+}
+
+/*
+ * Function: xcorr_kernel_neon_float_process1
+ * ---------------------------------
+ * Computes single correlation values and stores in *sum
+ */
+static void xcorr_kernel_neon_float_process1(const float32_t *x,
+      const float32_t *y, float32_t *sum, int len) {
+   float32x4_t XX[4];
+   float32x4_t YY[4];
+   float32x2_t XX_2;
+   float32x2_t YY_2;
+   float32x4_t SUMM;
+   float32x2_t SUMM_2[2];
+   const float32_t *xi = x;
+   const float32_t *yi = y;
+
+   SUMM = vdupq_n_f32(0);
+
+   /* Work on 16 values per iteration */
+   while (len >= 16) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+      XX[2] = vld1q_f32(xi);
+      xi += 4;
+      XX[3] = vld1q_f32(xi);
+      xi += 4;
+
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+      YY[2] = vld1q_f32(yi);
+      yi += 4;
+      YY[3] = vld1q_f32(yi);
+      yi += 4;
+
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
+      SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
+      SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
+      len -= 16;
+   }
+
+   /* Work on 8 values */
+   if (len >= 8) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
+      len -= 8;
+   }
+
+   /* Work on 4 values */
+   if (len >= 4) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      len -= 4;
+   }
+
+   /* Start accumulating results */
+   SUMM_2[0] = vget_low_f32(SUMM);
+   if (len >= 2) {
+      /* While at it, consume 2 more values if available */
+      XX_2 = vld1_f32(xi);
+      xi += 2;
+      YY_2 = vld1_f32(yi);
+      yi += 2;
+      SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
+      len -= 2;
+   }
+   SUMM_2[1] = vget_high_f32(SUMM);
+   SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
+   SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
+   /* Ok, now we have result accumulated in SUMM_2[0].0 */
+
+   if (len > 0) {
+      /* Case when you have one value left */
+      XX_2 = vld1_dup_f32(xi);
+      YY_2 = vld1_dup_f32(yi);
+      SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
+   }
+
+   vst1_lane_f32(sum, SUMM_2[0], 0);
+}
+
+void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
+                        opus_val32 *xcorr, int len, int max_pitch) {
+   int i;
+   celt_assert(max_pitch > 0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+
+   for (i = 0; i < (max_pitch-3); i += 4) {
+      xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
+            (float32_t *)xcorr+i, len);
+   }
+
+   /* In case max_pitch isn't multiple of 4
+    * compute single correlation value per iteration
+    */
+   for (; i < max_pitch; i++) {
+      xcorr_kernel_neon_float_process1((const float32_t *)_x,
+            (const float32_t *)_y+i, (float32_t *)xcorr+i, len);
+   }
+}
+#endif
diff --git a/media/libopus/celt/arm/celt_pitch_xcorr_arm.s b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
index 09917b16bf..f96e0a88bb 100644
--- a/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
+++ b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
@@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON
 
 ; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
 xcorr_kernel_neon PROC
+xcorr_kernel_neon_start
   ; input:
   ;   r3     = int         len
   ;   r4     = opus_val16 *x
@@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4
   VEOR         q0, q0, q0
   ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
   ; So we don't save/restore any other registers.
-  BL xcorr_kernel_neon
+  BL xcorr_kernel_neon_start
   SUBS         r6, r6, #4
   VST1.32      {q0}, [r2]!
   ; _y += 4
@@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP
 ; This will get used on ARMv7 devices without NEON, so it has been optimized
 ; to take advantage of dual-issuing where possible.
 xcorr_kernel_edsp PROC
+xcorr_kernel_edsp_start
   ; input:
   ;   r3      = int         len
   ;   r4      = opus_val16 *_x (must be 32-bit aligned)
@@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done
   SUBS         r2, r2, #1         ; j--
   ; Stall
   SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
-  LDRGTH       r14, [r4], #2      ; r14 = *x++
+  LDRHGT       r14, [r4], #2      ; r14 = *x++
   SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
   SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
   SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
@@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done
   SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
   LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
   SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
-  LDRGTH       r12, [r4], #2      ; r12 = *x++
+  LDRHGT       r12, [r4], #2      ; r12 = *x++
   SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
   BLE xcorr_kernel_edsp_done
   SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
@@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done
   SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
   LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
   SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
-  LDRGTH       r14, [r4]          ; r14 = *x
+  LDRHGT       r14, [r4]          ; r14 = *x
   SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
   BLE xcorr_kernel_edsp_done
   SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
@@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4
 celt_pitch_xcorr_edsp_process1u_loop4_done
   ADDS         r12, r12, #4
 celt_pitch_xcorr_edsp_process1u_loop1
-  LDRGEH       r6, [r4], #2
+  LDRHGE       r6, [r4], #2
   ; Stall
   SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
-  SUBGES       r12, r12, #1
-  LDRGTH       r8, [r5], #2
+  SUBSGE       r12, r12, #1
+  LDRHGT       r8, [r5], #2
   BGT celt_pitch_xcorr_edsp_process1u_loop1
   ; Restore _x
   SUB          r4, r4, r3, LSL #1
@@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4
   MOV          r7, #0
   MOV          r8, #0
   MOV          r9, #0
-  BL xcorr_kernel_edsp  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  BL xcorr_kernel_edsp_start  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
   ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
   CMP          r0, r6
   ; _y+=4
@@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1
   ADDS         r12, r12, #1
   ; Stall
   SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
-  LDRGTH       r7, [r4], #2
+  LDRHGT       r7, [r4], #2
   SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
   BLE celt_pitch_xcorr_edsp_process2_done
   LDRH         r9, [r5], #2
@@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done
   SUBGE        r12, r12, #2
   SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
   ADDS         r12, r12, #1
-  LDRGEH       r6, [r4], #2
-  LDRGEH       r8, [r5], #2
+  LDRHGE       r6, [r4], #2
+  LDRHGE       r8, [r5], #2
   ; Stall
   SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
   ; maxcorr = max(maxcorr, sum)
diff --git a/media/libopus/celt/arm/fft_arm.h b/media/libopus/celt/arm/fft_arm.h
new file mode 100644
index 0000000000..0cb55d8e22
--- /dev/null
+++ b/media/libopus/celt/arm/fft_arm.h
@@ -0,0 +1,72 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file fft_arm.h
+   @brief ARM Neon Intrinsic optimizations for fft using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if !defined(FFT_ARM_H)
+#define FFT_ARM_H
+
+#include "config.h"
+#include "kiss_fft.h"
+
+#if defined(HAVE_ARM_NE10)
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st);
+void opus_fft_free_arm_neon(kiss_fft_state *st);
+
+void opus_fft_neon(const kiss_fft_state *st,
+                   const kiss_fft_cpx *fin,
+                   kiss_fft_cpx *fout);
+
+void opus_ifft_neon(const kiss_fft_state *st,
+                    const kiss_fft_cpx *fin,
+                    kiss_fft_cpx *fout);
+
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_FFT (1)
+
+#define opus_fft_alloc_arch(_st, arch) \
+   ((void)(arch), opus_fft_alloc_arm_neon(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+   ((void)(arch), opus_fft_free_arm_neon(_st))
+
+#define opus_fft(_st, _fin, _fout, arch) \
+   ((void)(arch), opus_fft_neon(_st, _fin, _fout))
+
+#define opus_ifft(_st, _fin, _fout, arch) \
+   ((void)(arch), opus_ifft_neon(_st, _fin, _fout))
+
+#endif /* OPUS_HAVE_RTCD */
+
+#endif /* HAVE_ARM_NE10 */
+
+#endif
diff --git a/media/libopus/celt/arm/fixed_armv4.h b/media/libopus/celt/arm/fixed_armv4.h
index b690bc8cea..efb3b1896a 100644
--- a/media/libopus/celt/arm/fixed_armv4.h
+++ b/media/libopus/celt/arm/fixed_armv4.h
@@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
 #undef MAC16_32_Q15
 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
 
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q16
+#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
 
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
 #undef MULT32_32_Q31
diff --git a/media/libopus/celt/arm/fixed_armv5e.h b/media/libopus/celt/arm/fixed_armv5e.h
index 1194a7d3ec..36a6321101 100644
--- a/media/libopus/celt/arm/fixed_armv5e.h
+++ b/media/libopus/celt/arm/fixed_armv5e.h
@@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
 }
 #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
 
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q16
+static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q16\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
+
 /** 16x16 multiply-add where the result fits in 32 bits */
 #undef MAC16_16
 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
@@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
 }
 #define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
 
+#ifdef OPUS_ARM_INLINE_MEDIA
+
+#undef SIG2WORD16
+static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
+{
+   celt_sig res;
+   __asm__(
+       "#SIG2WORD16\n\t"
+       "ssat %0, #16, %1, ASR #12\n\t"
+       : "=r"(res)
+       : "r"(x+2048)
+   );
+   return EXTRACT16(res);
+}
+#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
+
+#endif /* OPUS_ARM_INLINE_MEDIA */
+
 #endif
diff --git a/media/libopus/celt/arm/mdct_arm.h b/media/libopus/celt/arm/mdct_arm.h
new file mode 100644
index 0000000000..49cbb44576
--- /dev/null
+++ b/media/libopus/celt/arm/mdct_arm.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file arm_mdct.h
+   @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(MDCT_ARM_H)
+#define MDCT_ARM_H
+
+#include "config.h"
+#include "mdct.h"
+
+#if defined(HAVE_ARM_NE10)
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window, int overlap,
+                           int shift, int stride, int arch);
+
+void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 *window, int overlap,
+                            int shift, int stride, int arch);
+
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_MDCT (1)
+#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
+      clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
+#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
+      clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
+#endif /* OPUS_HAVE_RTCD */
+#endif /* HAVE_ARM_NE10 */
+
+#endif
diff --git a/media/libopus/celt/arm/pitch_arm.h b/media/libopus/celt/arm/pitch_arm.h
index a07f8ac2fa..8626ed75b9 100644
--- a/media/libopus/celt/arm/pitch_arm.h
+++ b/media/libopus/celt/arm/pitch_arm.h
@@ -52,6 +52,17 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
 #  endif
 
-# endif
-
+#else /* Start !FIXED_POINT */
+/* Float case */
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
+                                 opus_val32 *xcorr, int len, int max_pitch);
+#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
+#define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+   ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
+#endif
+#endif
+
+#endif /* end !FIXED_POINT */
 #endif
diff --git a/media/libopus/celt/bands.c b/media/libopus/celt/bands.c
index cce56e2f6e..25f229e267 100644
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@@ -92,11 +92,11 @@ static int bitexact_log2tan(int isin,int icos)
 
 #ifdef FIXED_POINT
 /* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
 {
    int i, c, N;
    const opus_int16 *eBands = m->eBands;
-   N = M*m->shortMdctSize;
+   N = m->shortMdctSize<<LM;
    c=0; do {
       for (i=0;i<end;i++)
       {
@@ -104,18 +104,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
          opus_val32 maxval=0;
          opus_val32 sum = 0;
 
-         j=M*eBands[i]; do {
-            maxval = MAX32(maxval, X[j+c*N]);
-            maxval = MAX32(maxval, -X[j+c*N]);
-         } while (++j<M*eBands[i+1]);
-
+         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
          if (maxval > 0)
          {
-            int shift = celt_ilog2(maxval)-10;
-            j=M*eBands[i]; do {
-               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
-                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
-            } while (++j<M*eBands[i+1]);
+            int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
+            j=eBands[i]<<LM;
+            if (shift>0)
+            {
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
+                        EXTRACT16(SHR32(X[j+c*N],shift)));
+               } while (++j<eBands[i+1]<<LM);
+            } else {
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
+                        EXTRACT16(SHL32(X[j+c*N],-shift)));
+               } while (++j<eBands[i+1]<<LM);
+            }
             /* We're adding one here to ensure the normalized band isn't larger than unity norm */
             bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
          } else {
@@ -150,18 +155,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 
 #else /* FIXED_POINT */
 /* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
 {
    int i, c, N;
    const opus_int16 *eBands = m->eBands;
-   N = M*m->shortMdctSize;
+   N = m->shortMdctSize<<LM;
    c=0; do {
       for (i=0;i<end;i++)
       {
-         int j;
-         opus_val32 sum = 1e-27f;
-         for (j=M*eBands[i];j<M*eBands[i+1];j++)
-            sum += X[j+c*N]*X[j+c*N];
+         opus_val32 sum;
+         sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
          bandE[i+c*m->nbEBands] = celt_sqrt(sum);
          /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
       }
@@ -190,74 +193,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 
 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
-      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
+      int end, int M, int downsample, int silence)
 {
-   int i, c, N;
+   int i, N;
+   int bound;
+   celt_sig * OPUS_RESTRICT f;
+   const celt_norm * OPUS_RESTRICT x;
    const opus_int16 *eBands = m->eBands;
    N = M*m->shortMdctSize;
-   celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
-   c=0; do {
-      celt_sig * OPUS_RESTRICT f;
-      const celt_norm * OPUS_RESTRICT x;
-      f = freq+c*N;
-      x = X+c*N+M*eBands[start];
-      for (i=0;i<M*eBands[start];i++)
-         *f++ = 0;
-      for (i=start;i<end;i++)
-      {
-         int j, band_end;
-         opus_val16 g;
-         opus_val16 lg;
+   bound = M*eBands[end];
+   if (downsample!=1)
+      bound = IMIN(bound, N/downsample);
+   if (silence)
+   {
+      bound = 0;
+      start = end = 0;
+   }
+   f = freq;
+   x = X+M*eBands[start];
+   for (i=0;i<M*eBands[start];i++)
+      *f++ = 0;
+   for (i=start;i<end;i++)
+   {
+      int j, band_end;
+      opus_val16 g;
+      opus_val16 lg;
 #ifdef FIXED_POINT
-         int shift;
+      int shift;
 #endif
-         j=M*eBands[i];
-         band_end = M*eBands[i+1];
-         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+      j=M*eBands[i];
+      band_end = M*eBands[i+1];
+      lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
 #ifndef FIXED_POINT
-         g = celt_exp2(lg);
+      g = celt_exp2(lg);
 #else
-         /* Handle the integer part of the log energy */
-         shift = 16-(lg>>DB_SHIFT);
-         if (shift>31)
-         {
-            shift=0;
-            g=0;
-         } else {
-            /* Handle the fractional part. */
-            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
-         }
-         /* Handle extreme gains with negative shift. */
-         if (shift<0)
-         {
-            /* For shift < -2 we'd be likely to overflow, so we're capping
+      /* Handle the integer part of the log energy */
+      shift = 16-(lg>>DB_SHIFT);
+      if (shift>31)
+      {
+         shift=0;
+         g=0;
+      } else {
+         /* Handle the fractional part. */
+         g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+      }
+      /* Handle extreme gains with negative shift. */
+      if (shift<0)
+      {
+         /* For shift < -2 we'd be likely to overflow, so we're capping
                the gain here. This shouldn't happen unless the bitstream is
                already corrupted. */
-            if (shift < -2)
-            {
-               g = 32767;
-               shift = -2;
-            }
-            do {
-               *f++ = SHL32(MULT16_16(*x++, g), -shift);
-            } while (++j<band_end);
-         } else
+         if (shift < -2)
+         {
+            g = 32767;
+            shift = -2;
+         }
+         do {
+            *f++ = SHL32(MULT16_16(*x++, g), -shift);
+         } while (++j<band_end);
+      } else
 #endif
          /* Be careful of the fixed-point "else" just above when changing this code */
          do {
             *f++ = SHR32(MULT16_16(*x++, g), shift);
          } while (++j<band_end);
-      }
-      celt_assert(start <= end);
-      for (i=M*eBands[end];i<N;i++)
-         *f++ = 0;
-   } while (++c<C);
+   }
+   celt_assert(start <= end);
+   OPUS_CLEAR(&freq[bound], N-bound);
 }
 
 /* This prevents energy collapse for transients with multiple short MDCTs */
 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
-      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
-      opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+      int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
 {
    int c, i, j, k;
    for (i=start;i<end;i++)
@@ -272,7 +281,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
 
       N0 = m->eBands[i+1]-m->eBands[i];
       /* depth in 1/8 bits */
-      depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+      celt_assert(pulses[i]>=0);
+      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
 
 #ifdef FIXED_POINT
       thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
@@ -345,12 +355,12 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
          }
          /* We just added some energy, so we need to renormalise */
          if (renormalize)
-            renormalise_vector(X, N0<<LM, Q15ONE);
+            renormalise_vector(X, N0<<LM, Q15ONE, arch);
       } while (++c<C);
    }
 }
 
-static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
 {
    int i = bandID;
    int j;
@@ -370,25 +380,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons
       celt_norm r, l;
       l = X[j];
       r = Y[j];
-      X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+      X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
       /* Side is not encoded, no need to calculate */
    }
 }
 
-static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
 {
    int j;
    for (j=0;j<N;j++)
    {
-      celt_norm r, l;
-      l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
-      r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
-      X[j] = l+r;
-      Y[j] = r-l;
+      opus_val32 r, l;
+      l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
+      r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
+      X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
+      Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
    }
 }
 
-static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
 {
    int j;
    opus_val32 xp=0, side=0;
@@ -400,7 +410,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
    opus_val32 t, lgain, rgain;
 
    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
-   dual_inner_prod(Y, X, Y, N, &xp, &side);
+   dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
    /* Compensating for the mid normalization */
    xp = MULT16_32_Q15(mid, xp);
    /* mid and side are in Q15, not Q14 like X and Y */
@@ -409,8 +419,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
    {
-      for (j=0;j<N;j++)
-         Y[j] = X[j];
+      OPUS_COPY(Y, X, N);
       return;
    }
 
@@ -434,7 +443,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
    {
       celt_norm r, l;
       /* Apply mid scaling (side is already scaled) */
-      l = MULT16_16_Q15(mid, X[j]);
+      l = MULT16_16_P15(mid, X[j]);
       r = Y[j];
       X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
       Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
@@ -442,7 +451,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
 }
 
 /* Decide whether we should spread the pulses in the current frame */
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
       int end, int C, int M)
 {
@@ -463,7 +472,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
       {
          int j, N, tmp=0;
          int tcount[3] = {0,0,0};
-         celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
          N = M*(eBands[i+1]-eBands[i]);
          if (N<=8)
             continue;
@@ -483,7 +492,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
 
          /* Only include four last bands (8 kHz and up) */
          if (i>m->nbEBands-4)
-            hf_sum += 32*(tcount[1]+tcount[0])/N;
+            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
          tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
          sum += tmp*256;
          nbBands++;
@@ -493,7 +502,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
    if (update_hf)
    {
       if (hf_sum)
-         hf_sum /= C*(4-m->nbEBands+end);
+         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
       *hf_average = (*hf_average+hf_sum)>>1;
       hf_sum = *hf_average;
       if (*tapset_decision==2)
@@ -509,7 +518,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
    }
    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
    celt_assert(nbBands>0); /* end has to be non-zero */
-   sum /= nbBands;
+   celt_assert(sum>=0);
+   sum = celt_udiv(sum, nbBands);
    /* Recursive averaging */
    sum = (sum+*average)>>1;
    *average = sum;
@@ -567,8 +577,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard
          for (j=0;j<N0;j++)
             tmp[i*N0+j] = X[j*stride+i];
    }
-   for (j=0;j<N;j++)
-      X[j] = tmp[j];
+   OPUS_COPY(X, tmp, N);
    RESTORE_STACK;
 }
 
@@ -591,8 +600,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
          for (j=0;j<N0;j++)
             tmp[j*stride+i] = X[i*N0+j];
    }
-   for (j=0;j<N;j++)
-      X[j] = tmp[j];
+   OPUS_COPY(X, tmp, N);
    RESTORE_STACK;
 }
 
@@ -603,11 +611,11 @@ void haar1(celt_norm *X, int N0, int stride)
    for (i=0;i<stride;i++)
       for (j=0;j<N0;j++)
       {
-         celt_norm tmp1, tmp2;
-         tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
-         tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
-         X[stride*2*j+i] = tmp1 + tmp2;
-         X[stride*(2*j+1)+i] = tmp1 - tmp2;
+         opus_val32 tmp1, tmp2;
+         tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
+         tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+         X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
+         X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
       }
 }
 
@@ -622,7 +630,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
    /* The upper limit ensures that in a stereo split with itheta==16384, we'll
        always have enough bits left over to code at least one pulse in the
        side; otherwise it would collapse, since it doesn't get folded. */
-   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+   qb = celt_sudiv(b+N2*offset, N2);
+   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
 
    qb = IMIN(8<<BITRES, qb);
 
@@ -647,6 +656,7 @@ struct band_ctx {
    opus_int32 remaining_bits;
    const celt_ener *bandE;
    opus_uint32 seed;
+   int arch;
 };
 
 struct split_ctx {
@@ -698,7 +708,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
          side and mid. With just that parameter, we can re-scale both
          mid and side because we know that 1) they have unit norm and
          2) they are orthogonal. */
-      itheta = stereo_itheta(X, Y, stereo, N);
+      itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
    }
    tell = ec_tell_frac(ec);
    if (qn!=1)
@@ -769,7 +779,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
             ec_dec_update(ec, fl, fl+fs, ft);
          }
       }
-      itheta = (opus_int32)itheta*16384/qn;
+      celt_assert(itheta>=0);
+      itheta = celt_udiv((opus_int32)itheta*16384, qn);
       if (encode && stereo)
       {
          if (itheta==0)
@@ -1021,8 +1032,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
             fill &= cm_mask;
             if (!fill)
             {
-               for (j=0;j<N;j++)
-                  X[j] = 0;
+               OPUS_CLEAR(X, N);
             } else {
                if (lowband == NULL)
                {
@@ -1046,7 +1056,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
                   }
                   cm = fill;
                }
-               renormalise_vector(X, N, gain);
+               renormalise_vector(X, N, gain, ctx->arch);
             }
          }
       }
@@ -1084,7 +1094,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
 
    longBlocks = B0==1;
 
-   N_B /= B;
+   N_B = celt_udiv(N_B, B);
 
    /* Special case for one sample */
    if (N==1)
@@ -1098,9 +1108,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
 
    if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
    {
-      int j;
-      for (j=0;j<N;j++)
-         lowband_scratch[j] = lowband[j];
+      OPUS_COPY(lowband_scratch, lowband, N);
       lowband = lowband_scratch;
    }
 
@@ -1340,7 +1348,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
    if (resynth)
    {
       if (N!=2)
-         stereo_merge(X, Y, mid, N);
+         stereo_merge(X, Y, mid, N, ctx->arch);
       if (inv)
       {
          int j;
@@ -1353,9 +1361,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
 
 
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
-      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
-      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
-      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
+      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
+      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
+      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
+      opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
+      opus_uint32 *seed, int arch)
 {
    int i;
    opus_int32 remaining_bits;
@@ -1397,6 +1407,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
    ctx.m = m;
    ctx.seed = *seed;
    ctx.spread = spread;
+   ctx.arch = arch;
    for (i=start;i<end;i++)
    {
       opus_int32 tell;
@@ -1428,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       ctx.remaining_bits = remaining_bits;
       if (i <= codedBands-1)
       {
-         curr_balance = balance / IMIN(3, codedBands-i);
+         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
          b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
       } else {
          b = 0;
diff --git a/media/libopus/celt/bands.h b/media/libopus/celt/bands.h
index 96ba52a649..e8bef4bad0 100644
--- a/media/libopus/celt/bands.h
+++ b/media/libopus/celt/bands.h
@@ -41,7 +41,7 @@
  * @param X Spectrum
  * @param bandE Square root of the energy for each band (returned)
  */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
 
 /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
 
@@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
  * @param bandE Square root of the energy for each band
  */
 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
-      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
+      int end, int M, int downsample, int silence);
 
 #define SPREAD_NONE       (0)
 #define SPREAD_LIGHT      (1)
 #define SPREAD_NORMAL     (2)
 #define SPREAD_AGGRESSIVE (3)
 
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
       int end, int C, int M);
 
@@ -97,15 +98,20 @@ void haar1(celt_norm *X, int N0, int stride);
  * @param LM log2() of the number of 2.5 subframes in the frame
  * @param codedBands Last band to receive bits + 1
  * @param seed Random generator seed
+ * @param arch Run-time architecture (see opus_select_arch())
  */
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
-      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
-      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
-      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
+      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
+      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
+      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
+      opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
+      int arch);
 
-void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
-      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
-      opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
+void anti_collapse(const CELTMode *m, celt_norm *X_,
+      unsigned char *collapse_masks, int LM, int C, int size, int start,
+      int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
+      int arch);
 
 opus_uint32 celt_lcg_rand(opus_uint32 seed);
 
diff --git a/media/libopus/celt/celt.c b/media/libopus/celt/celt.c
index 3e0ce6e6a5..b121c51a1f 100644
--- a/media/libopus/celt/celt.c
+++ b/media/libopus/celt/celt.c
@@ -54,6 +54,10 @@
 #define PACKAGE_VERSION "unknown"
 #endif
 
+#if defined(MIPSr1_ASM)
+#include "mips/celt_mipsr1.h"
+#endif
+
 
 int resampling_factor(opus_int32 rate)
 {
@@ -85,8 +89,71 @@ int resampling_factor(opus_int32 rate)
    return ret;
 }
 
-#ifndef OVERRIDE_COMB_FILTER_CONST
-static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
+/* This version should be faster on ARM */
+#ifdef OPUS_ARM_ASM
+#ifndef NON_STATIC_COMB_FILTER_CONST_C
+static
+#endif
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = SHL32(x[-T-2], 1);
+   x3 = SHL32(x[-T-1], 1);
+   x2 = SHL32(x[-T], 1);
+   x1 = SHL32(x[-T+1], 1);
+   for (i=0;i<N-4;i+=5)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      y[i] = t;
+      x4=SHL32(x[i-T+3],1);
+      t = MAC16_32_Q16(x[i+1], g10, x1);
+      t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
+      t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
+      y[i+1] = t;
+      x3=SHL32(x[i-T+4],1);
+      t = MAC16_32_Q16(x[i+2], g10, x0);
+      t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
+      t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
+      y[i+2] = t;
+      x2=SHL32(x[i-T+5],1);
+      t = MAC16_32_Q16(x[i+3], g10, x4);
+      t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
+      t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
+      y[i+3] = t;
+      x1=SHL32(x[i-T+6],1);
+      t = MAC16_32_Q16(x[i+4], g10, x3);
+      t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
+      t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
+      y[i+4] = t;
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      y[i] = t;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+#endif
+}
+#else
+#ifndef NON_STATIC_COMB_FILTER_CONST_C
+static
+#endif
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
       opus_val16 g10, opus_val16 g11, opus_val16 g12)
 {
    opus_val32 x0, x1, x2, x3, x4;
@@ -110,10 +177,12 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
 
 }
 #endif
+#endif
 
+#ifndef OVERRIDE_comb_filter
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
-      const opus_val16 *window, int overlap)
+      const opus_val16 *window, int overlap, int arch)
 {
    int i;
    /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
@@ -131,16 +200,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
          OPUS_MOVE(y, x, N);
       return;
    }
-   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
-   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
-   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
-   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
-   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
-   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
    x1 = x[-T1+1];
    x2 = x[-T1  ];
    x3 = x[-T1-1];
    x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
    for (i=0;i<overlap;i++)
    {
       opus_val16 f;
@@ -168,8 +240,9 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
    }
 
    /* Compute the part with the constant filter. */
-   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
 }
+#endif /* OVERRIDE_comb_filter */
 
 const signed char tf_select_table[4][8] = {
       {0, -1, 0, -1,    0,-1, 0,-1},
@@ -213,6 +286,9 @@ const char *opus_strerror(int error)
 const char *opus_get_version_string(void)
 {
     return "libopus " PACKAGE_VERSION
+    /* Applications may rely on the presence of this substring in the version
+       string to determine if they have a fixed-point or floating-point build
+       at runtime. */
 #ifdef FIXED_POINT
           "-fixed"
 #endif
diff --git a/media/libopus/celt/celt.h b/media/libopus/celt/celt.h
index 5deea1f0aa..a423b95046 100644
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -134,7 +134,8 @@ int celt_decoder_get_size(int channels);
 
 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
 
-int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
 
 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
@@ -200,15 +201,25 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
 
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
-      const opus_val16 *window, int overlap);
+      const opus_val16 *window, int overlap, int arch);
+
+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+                         opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch)		\
+    ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
+#endif
 
 void init_caps(const CELTMode *m,int *cap,int LM,int C);
 
 #ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
-
-void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
-      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+      opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+      int LM, int downsample, int silence);
 #endif
 
 #ifdef __cplusplus
diff --git a/media/libopus/celt/celt_decoder.c b/media/libopus/celt/celt_decoder.c
index 830398eed8..b688f2a4e3 100644
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -51,6 +51,9 @@
 #include "celt_lpc.h"
 #include "vq.h"
 
+#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
+#define NORM_ALIASING_HACK
+#endif
 /**********************************************************************/
 /*                                                                    */
 /*                             DECODER                                */
@@ -175,28 +178,24 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
 }
 #endif /* CUSTOM_MODES */
 
-static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
-{
-#ifdef FIXED_POINT
-   x = PSHR32(x, SIG_SHIFT);
-   x = MAX32(x, -32768);
-   x = MIN32(x, 32767);
-   return EXTRACT16(x);
-#else
-   return (opus_val16)x;
-#endif
-}
 
 #ifndef RESYNTH
 static
 #endif
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+      celt_sig *mem, int accum)
 {
    int c;
    int Nd;
    int apply_downsampling=0;
    opus_val16 coef0;
-
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+#ifndef FIXED_POINT
+   (void)accum;
+   celt_assert(accum==0);
+#endif
+   ALLOC(scratch, N, celt_sig);
    coef0 = coef[0];
    Nd = N/downsample;
    c=0; do {
@@ -234,11 +233,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
          apply_downsampling=1;
       } else {
          /* Shortcut for the standard (non-custom modes) case */
-         for (j=0;j<N;j++)
+#ifdef FIXED_POINT
+         if (accum)
          {
-            celt_sig tmp = x[j] + m + VERY_SMALL;
-            m = MULT16_32_Q15(coef0, tmp);
-            y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+            for (j=0;j<N;j++)
+            {
+               celt_sig tmp = x[j] + m + VERY_SMALL;
+               m = MULT16_32_Q15(coef0, tmp);
+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
+            }
+         } else
+#endif
+         {
+            for (j=0;j<N;j++)
+            {
+               celt_sig tmp = x[j] + m + VERY_SMALL;
+               m = MULT16_32_Q15(coef0, tmp);
+               y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+            }
          }
       }
       mem[c] = m;
@@ -246,41 +258,95 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
       if (apply_downsampling)
       {
          /* Perform down-sampling */
-         for (j=0;j<Nd;j++)
-            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+#ifdef FIXED_POINT
+         if (accum)
+         {
+            for (j=0;j<Nd;j++)
+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
+         } else
+#endif
+         {
+            for (j=0;j<Nd;j++)
+               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+         }
       }
    } while (++c<C);
+   RESTORE_STACK;
 }
 
-/** Compute the IMDCT and apply window for all sub-frames and
-    all channels in a frame */
 #ifndef RESYNTH
 static
 #endif
-void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
-      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+                    opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
+                    int isTransient, int LM, int downsample,
+                    int silence, int arch)
 {
-   int b, c;
+   int c, i;
+   int M;
+   int b;
    int B;
-   int N;
+   int N, NB;
    int shift;
-   const int overlap = OVERLAP(mode);
+   int nbEBands;
+   int overlap;
+   VARDECL(celt_sig, freq);
+   SAVE_STACK;
 
-   if (shortBlocks)
+   overlap = mode->overlap;
+   nbEBands = mode->nbEBands;
+   N = mode->shortMdctSize<<LM;
+   ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
+   M = 1<<LM;
+
+   if (isTransient)
    {
-      B = shortBlocks;
-      N = mode->shortMdctSize;
+      B = M;
+      NB = mode->shortMdctSize;
       shift = mode->maxLM;
    } else {
       B = 1;
-      N = mode->shortMdctSize<<LM;
+      NB = mode->shortMdctSize<<LM;
       shift = mode->maxLM-LM;
    }
-   c=0; do {
-      /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+
+   if (CC==2&&C==1)
+   {
+      /* Copying a mono streams to two channels */
+      celt_sig *freq2;
+      denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+            downsample, silence);
+      /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
+      freq2 = out_syn[1]+overlap/2;
+      OPUS_COPY(freq2, freq, N);
       for (b=0;b<B;b++)
-         clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
-   } while (++c<C);
+         clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch);
+   } else if (CC==1&&C==2)
+   {
+      /* Downmixing a stereo stream to mono */
+      celt_sig *freq2;
+      freq2 = out_syn[0]+overlap/2;
+      denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+            downsample, silence);
+      /* Use the output buffer as temp array before downmixing. */
+      denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
+            downsample, silence);
+      for (i=0;i<N;i++)
+         freq[i] = HALF32(ADD32(freq[i],freq2[i]));
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
+   } else {
+      /* Normal case (mono or stereo) */
+      c=0; do {
+         denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
+               downsample, silence);
+         for (b=0;b<B;b++)
+            clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
+      } while (++c<CC);
+   }
+   RESTORE_STACK;
 }
 
 static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@@ -330,7 +396,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
    pitch of 480 Hz. */
 #define PLC_PITCH_LAG_MIN (100)
 
-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
+{
+   int pitch_index;
+   VARDECL( opus_val16, lp_pitch_buf );
+   SAVE_STACK;
+   ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+   pitch_downsample(decode_mem, lp_pitch_buf,
+         DECODE_BUFFER_SIZE, C, arch);
+   pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+         DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+         PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
+   pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+   RESTORE_STACK;
+   return pitch_index;
+}
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
 {
    int c;
    int i;
@@ -343,11 +425,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
    int nbEBands;
    int overlap;
    int start;
-   int downsample;
    int loss_count;
    int noise_based;
    const opus_int16 *eBands;
-   VARDECL(celt_sig, scratch);
    SAVE_STACK;
 
    mode = st->mode;
@@ -367,40 +447,37 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
 
    loss_count = st->loss_count;
    start = st->start;
-   downsample = st->downsample;
    noise_based = loss_count >= 5 || start != 0;
-   ALLOC(scratch, noise_based?N*C:N, celt_sig);
    if (noise_based)
    {
       /* Noise-based PLC/CNG */
-      celt_sig *freq;
+#ifdef NORM_ALIASING_HACK
+      celt_norm *X;
+#else
       VARDECL(celt_norm, X);
+#endif
       opus_uint32 seed;
-      opus_val16 *plcLogE;
       int end;
       int effEnd;
-
+      opus_val16 decay;
       end = st->end;
       effEnd = IMAX(start, IMIN(end, mode->effEBands));
 
-      /* Share the interleaved signal MDCT coefficient buffer with the
-         deemphasis scratch buffer. */
-      freq = scratch;
+#ifdef NORM_ALIASING_HACK
+      /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+         but it saves almost 4kB of stack. */
+      X = (celt_norm*)(out_syn[C-1]+overlap/2);
+#else
       ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+#endif
 
-      if (loss_count >= 5)
-         plcLogE = backgroundLogE;
-      else {
-         /* Energy decay */
-         opus_val16 decay = loss_count==0 ?
-               QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
-         c=0; do
-         {
-            for (i=start;i<end;i++)
-               oldBandE[c*nbEBands+i] -= decay;
-         } while (++c<C);
-         plcLogE = oldBandE;
-      }
+      /* Energy decay */
+      decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+      c=0; do
+      {
+         for (i=start;i<end;i++)
+            oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
+      } while (++c<C);
       seed = st->rng;
       for (c=0;c<C;c++)
       {
@@ -416,25 +493,17 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
                seed = celt_lcg_rand(seed);
                X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
             }
-            renormalise_vector(X+boffs, blen, Q15ONE);
+            renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
          }
       }
       st->rng = seed;
 
-      denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
-
-      c=0; do {
-         int bound = eBands[effEnd]<<LM;
-         if (downsample!=1)
-            bound = IMIN(bound, N/downsample);
-         for (i=bound;i<N;i++)
-            freq[c*N+i] = 0;
-      } while (++c<C);
       c=0; do {
          OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
                DECODE_BUFFER_SIZE-N+(overlap>>1));
       } while (++c<C);
-      compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+
+      celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
    } else {
       /* Pitch-based PLC */
       const opus_val16 *window;
@@ -445,15 +514,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
 
       if (loss_count == 0)
       {
-         VARDECL( opus_val16, lp_pitch_buf );
-         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
-         pitch_downsample(decode_mem, lp_pitch_buf,
-               DECODE_BUFFER_SIZE, C, st->arch);
-         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
-               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
-               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
-         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
-         st->last_pitch_index = pitch_index;
+         st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
       } else {
          pitch_index = st->last_pitch_index;
          fade = QCONST16(.8f,15);
@@ -516,7 +577,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
             }
             /* Compute the excitation for exc_length samples before the loss. */
             celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
-                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
          }
 
          /* Check if the waveform is decaying, and if so how fast.
@@ -583,7 +644,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
                the signal domain. */
             celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
                   buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
-                  lpc_mem);
+                  lpc_mem, st->arch);
          }
 
          /* Check if the synthesis energy is higher than expected, which can
@@ -631,7 +692,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
          comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
               st->postfilter_period, st->postfilter_period, overlap,
               -st->postfilter_gain, -st->postfilter_gain,
-              st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
 
          /* Simulate TDAC on the concealed audio so that it blends with the
             MDCT of the next frame. */
@@ -644,22 +705,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
       } while (++c<C);
    }
 
-   deemphasis(out_syn, pcm, N, C, downsample,
-         mode->preemph, st->preemph_memD, scratch);
-
    st->loss_count = loss_count+1;
 
    RESTORE_STACK;
 }
 
-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
 {
    int c, i, N;
    int spread_decision;
    opus_int32 bits;
    ec_dec _dec;
-   VARDECL(celt_sig, freq);
+#ifdef NORM_ALIASING_HACK
+   celt_norm *X;
+#else
    VARDECL(celt_norm, X);
+#endif
    VARDECL(int, fine_quant);
    VARDECL(int, pulses);
    VARDECL(int, cap);
@@ -677,6 +739,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    int intra_ener;
    const int CC = st->channels;
    int LM, M;
+   int start;
+   int end;
    int effEnd;
    int codedBands;
    int alloc_trim;
@@ -703,11 +767,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
+   start = st->start;
+   end = st->end;
    frame_size *= st->downsample;
 
-   c=0; do {
-      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
-   } while (++c<CC);
    lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
    oldBandE = lpc+CC*LPC_ORDER;
    oldLogE = oldBandE + 2*nbEBands;
@@ -725,7 +788,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
          if (data0<0)
             return OPUS_INVALID_PACKET;
       }
-      st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+      st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
       LM = (data0>>3)&0x3;
       C = 1 + ((data0>>2)&0x1);
       data++;
@@ -752,14 +815,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
       return OPUS_BAD_ARG;
 
    N = M*mode->shortMdctSize;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<CC);
 
-   effEnd = st->end;
+   effEnd = end;
    if (effEnd > mode->effEBands)
       effEnd = mode->effEBands;
 
    if (data == NULL || len<=1)
    {
-      celt_decode_lost(st, pcm, N, LM);
+      celt_decode_lost(st, N, LM);
+      deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
       RESTORE_STACK;
       return frame_size/st->downsample;
    }
@@ -795,7 +863,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    postfilter_gain = 0;
    postfilter_pitch = 0;
    postfilter_tapset = 0;
-   if (st->start==0 && tell+16 <= total_bits)
+   if (start==0 && tell+16 <= total_bits)
    {
       if(ec_dec_bit_logp(dec, 1))
       {
@@ -826,11 +894,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    /* Decode the global flags (first symbols in the stream) */
    intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
    /* Get band energies */
-   unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+   unquant_coarse_energy(mode, start, end, oldBandE,
          intra_ener, dec, C, LM);
 
    ALLOC(tf_res, nbEBands, int);
-   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+   tf_decode(start, end, isTransient, tf_res, LM, dec);
 
    tell = ec_tell(dec);
    spread_decision = SPREAD_NORMAL;
@@ -846,7 +914,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    dynalloc_logp = 6;
    total_bits<<=BITRES;
    tell = ec_tell_frac(dec);
-   for (i=st->start;i<st->end;i++)
+   for (i=start;i<end;i++)
    {
       int width, quanta;
       int dynalloc_loop_logp;
@@ -885,84 +953,62 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    ALLOC(pulses, nbEBands, int);
    ALLOC(fine_priority, nbEBands, int);
 
-   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+   codedBands = compute_allocation(mode, start, end, offsets, cap,
          alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
          fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
 
-   unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+   unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
+
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
 
    /* Decode fixed codebook */
    ALLOC(collapse_masks, C*nbEBands, unsigned char);
-   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
 
-   quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+#ifdef NORM_ALIASING_HACK
+   /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+      but it saves almost 4kB of stack. */
+   X = (celt_norm*)(out_syn[CC-1]+overlap/2);
+#else
+   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+#endif
+
+   quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
          NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
-         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch);
 
    if (anti_collapse_rsv > 0)
    {
       anti_collapse_on = ec_dec_bits(dec, 1);
    }
 
-   unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+   unquant_energy_finalise(mode, start, end, oldBandE,
          fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
 
    if (anti_collapse_on)
       anti_collapse(mode, X, collapse_masks, LM, C, N,
-            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-
-   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
+            start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
 
    if (silence)
    {
       for (i=0;i<C*nbEBands;i++)
          oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
-      for (i=0;i<C*N;i++)
-         freq[i] = 0;
-   } else {
-      /* Synthesis */
-      denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
-   }
-   c=0; do {
-      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
-   } while (++c<CC);
-
-   c=0; do {
-      int bound = M*eBands[effEnd];
-      if (st->downsample!=1)
-         bound = IMIN(bound, N/st->downsample);
-      for (i=bound;i<N;i++)
-         freq[c*N+i] = 0;
-   } while (++c<C);
-
-   c=0; do {
-      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
-   } while (++c<CC);
-
-   if (CC==2&&C==1)
-   {
-      for (i=0;i<N;i++)
-         freq[N+i] = freq[i];
-   }
-   if (CC==1&&C==2)
-   {
-      for (i=0;i<N;i++)
-         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
    }
 
-   /* Compute inverse MDCTs */
-   compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+   celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
+                  C, CC, isTransient, LM, st->downsample, silence, st->arch);
 
    c=0; do {
       st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
       st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
       comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
             st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
-            mode->window, overlap);
+            mode->window, overlap, st->arch);
       if (LM!=0)
          comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
                st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
-               mode->window, overlap);
+               mode->window, overlap, st->arch);
 
    } while (++c<CC);
    st->postfilter_period_old = st->postfilter_period;
@@ -978,32 +1024,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
       st->postfilter_tapset_old = st->postfilter_tapset;
    }
 
-   if (C==1) {
-      for (i=0;i<nbEBands;i++)
-         oldBandE[nbEBands+i]=oldBandE[i];
-   }
+   if (C==1)
+      OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
 
    /* In case start or end were to change */
    if (!isTransient)
    {
+      opus_val16 max_background_increase;
+      OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
+      OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
+      /* In normal circumstances, we only allow the noise floor to increase by
+         up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB
+         increase for each update.*/
+      if (st->loss_count < 10)
+         max_background_increase = M*QCONST16(0.001f,DB_SHIFT);
+      else
+         max_background_increase = QCONST16(1.f,DB_SHIFT);
       for (i=0;i<2*nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<2*nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
-      for (i=0;i<2*nbEBands;i++)
-         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
    } else {
       for (i=0;i<2*nbEBands;i++)
          oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
    }
    c=0; do
    {
-      for (i=0;i<st->start;i++)
+      for (i=0;i<start;i++)
       {
          oldBandE[c*nbEBands+i]=0;
          oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
       }
-      for (i=st->end;i<nbEBands;i++)
+      for (i=end;i<nbEBands;i++)
       {
          oldBandE[c*nbEBands+i]=0;
          oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@@ -1011,8 +1061,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    } while (++c<2);
    st->rng = dec->rng;
 
-   /* We reuse freq[] as scratch space for the de-emphasis */
-   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
    st->loss_count = 0;
    RESTORE_STACK;
    if (ec_tell(dec) > 8*len)
@@ -1028,7 +1077,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
 #ifdef FIXED_POINT
 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
 {
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -1045,7 +1094,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
    N = frame_size;
 
    ALLOC(out, C*N, opus_int16);
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
    if (ret>0)
       for (j=0;j<C*ret;j++)
          pcm[j]=out[j]*(1.f/32768.f);
@@ -1059,7 +1108,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
 
 int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
 {
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
 }
 
 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
@@ -1075,7 +1124,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data
    N = frame_size;
    ALLOC(out, C*N, celt_sig);
 
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
 
    if (ret>0)
       for (j=0;j<C*ret;j++)
diff --git a/media/libopus/celt/celt_encoder.c b/media/libopus/celt/celt_encoder.c
index ffff0775df..41fbfd49c8 100644
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -57,7 +57,6 @@
  */
 struct OpusCustomEncoder {
    const OpusCustomMode *mode;     /**< Mode used by the encoder */
-   int overlap;
    int channels;
    int stream_channels;
 
@@ -173,7 +172,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
    OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
 
    st->mode = mode;
-   st->overlap = mode->overlap;
    st->stream_channels = st->channels = channels;
 
    st->upsample = 1;
@@ -276,8 +274,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
       }
       /*printf("\n");*/
       /* First few samples are bad because we don't propagate the memory */
-      for (i=0;i<12;i++)
-         tmp[i] = 0;
+      OPUS_CLEAR(tmp, 12);
 
 #ifdef FIXED_POINT
       /* Normalize tmp to max range */
@@ -346,9 +343,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
       {
          int id;
 #ifdef FIXED_POINT
-         id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
+         id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */
 #else
-         id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */
+         id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */
 #endif
          unmask += inv_table[id];
       }
@@ -366,7 +363,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
    /* Arbitrary metric for VBR boost */
    tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
    /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
-   *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
+   *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
    /*printf("%d %f\n", tf_max, mask_metric);*/
    RESTORE_STACK;
 #ifdef FUZZING
@@ -378,8 +375,8 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
 
 /* Looks for sudden increases of energy to decide whether we need to patch
    the transient decision */
-int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
-      int end, int C)
+static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
+      int start, int end, int C)
 {
    int i, c;
    opus_val32 mean_diff=0;
@@ -388,28 +385,28 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
       avoid false detection caused by irrelevant bands */
    if (C==1)
    {
-      spread_old[0] = oldE[0];
-      for (i=1;i<end;i++)
+      spread_old[start] = oldE[start];
+      for (i=start+1;i<end;i++)
          spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
    } else {
-      spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
-      for (i=1;i<end;i++)
+      spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]);
+      for (i=start+1;i<end;i++)
          spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
                                MAX16(oldE[i],oldE[i+nbEBands]));
    }
-   for (i=end-2;i>=0;i--)
+   for (i=end-2;i>=start;i--)
       spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
    /* Compute mean increase */
    c=0; do {
-      for (i=2;i<end-1;i++)
+      for (i=IMAX(2,start);i<end-1;i++)
       {
          opus_val16 x1, x2;
-         x1 = MAX16(0, newE[i]);
+         x1 = MAX16(0, newE[i + c*nbEBands]);
          x2 = MAX16(0, spread_old[i]);
          mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
       }
    } while (++c<C);
-   mean_diff = DIV32(mean_diff, C*(end-3));
+   mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start)));
    /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
    return mean_diff > QCONST16(1.f, DB_SHIFT);
 }
@@ -417,9 +414,10 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
 /** Apply window and compute the MDCT for all sub-frames and
     all channels in a frame */
 static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
-                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
+                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
+                          int arch)
 {
-   const int overlap = OVERLAP(mode);
+   const int overlap = mode->overlap;
    int N;
    int B;
    int shift;
@@ -438,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
       for (b=0;b<B;b++)
       {
          /* Interleaving the sub-frames while doing the MDCTs */
-         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
+         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
+                          &out[b+c*N*B], mode->window, overlap, shift, B,
+                          arch);
       }
    } while (++c<CC);
    if (CC==2&&C==1)
@@ -453,8 +453,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
          int bound = B*N/upsample;
          for (i=0;i<bound;i++)
             out[c*B*N+i] *= upsample;
-         for (;i<B*N;i++)
-            out[c*B*N+i] = 0;
+         OPUS_CLEAR(&out[c*B*N+bound], B*N-bound);
       } while (++c<C);
    }
 }
@@ -469,26 +468,30 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
    int Nu;
 
    coef0 = coef[0];
+   m = *mem;
 
+   /* Fast path for the normal 48kHz case and no clipping */
+   if (coef[1] == 0 && upsample == 1 && !clip)
+   {
+      for (i=0;i<N;i++)
+      {
+         opus_val16 x;
+         x = SCALEIN(pcmp[CC*i]);
+         /* Apply pre-emphasis */
+         inp[i] = SHL32(x, SIG_SHIFT) - m;
+         m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
+      }
+      *mem = m;
+      return;
+   }
 
    Nu = N/upsample;
    if (upsample!=1)
    {
-      for (i=0;i<N;i++)
-         inp[i] = 0;
+      OPUS_CLEAR(inp, N);
    }
    for (i=0;i<Nu;i++)
-   {
-      celt_sig x;
-
-      x = SCALEIN(pcmp[CC*i]);
-#ifndef FIXED_POINT
-      /* Replace NaNs with zeros */
-      if (!(x==x))
-         x = 0;
-#endif
-      inp[i*upsample] = x;
-   }
+      inp[i*upsample] = SCALEIN(pcmp[CC*i]);
 
 #ifndef FIXED_POINT
    if (clip)
@@ -500,7 +503,6 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
 #else
    (void)clip; /* Avoids a warning about clip being unused. */
 #endif
-   m = *mem;
 #ifdef CUSTOM_MODES
    if (coef[1] != 0)
    {
@@ -520,11 +522,11 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
    {
       for (i=0;i<N;i++)
       {
-         celt_sig x;
-         x = SHL32(inp[i], SIG_SHIFT);
+         opus_val16 x;
+         x = inp[i];
          /* Apply pre-emphasis */
-         inp[i] = x + m;
-         m = - MULT16_32_Q15(coef0, x);
+         inp[i] = SHL32(x, SIG_SHIFT) - m;
+         m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
       }
    }
    *mem = m;
@@ -575,15 +577,14 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
    *tf_sum = 0;
    for (i=0;i<len;i++)
    {
-      int j, k, N;
+      int k, N;
       int narrow;
       opus_val32 L1, best_L1;
       int best_level=0;
       N = (m->eBands[i+1]-m->eBands[i])<<LM;
       /* band is too narrow to be split down to LM=-1 */
       narrow = (m->eBands[i+1]-m->eBands[i])==1;
-      for (j=0;j<N;j++)
-         tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
+      OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N);
       /* Just add the right channel if we're in stereo */
       /*if (C==2)
          for (j=0;j<N;j++)
@@ -593,8 +594,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
       /* Check the -1 case for transients */
       if (isTransient && !narrow)
       {
-         for (j=0;j<N;j++)
-            tmp_1[j] = tmp[j];
+         OPUS_COPY(tmp_1, tmp, N);
          haar1(tmp_1, N>>LM, 1<<LM);
          L1 = l1_metric(tmp_1, N, LM+1, bias);
          if (L1<best_L1)
@@ -754,12 +754,12 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       const opus_val16 *bandLogE, int end, int LM, int C, int N0,
       AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
-      int intensity, opus_val16 surround_trim)
+      int intensity, opus_val16 surround_trim, int arch)
 {
    int i;
    opus_val32 diff=0;
    int c;
-   int trim_index = 5;
+   int trim_index;
    opus_val16 trim = QCONST16(5.f, 8);
    opus_val16 logXC, logXC2;
    if (C==2)
@@ -769,10 +769,9 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       /* Compute inter-channel correlation for low frequencies */
       for (i=0;i<8;i++)
       {
-         int j;
-         opus_val32 partial = 0;
-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-            partial = MAC16_16(partial, X[j], X[N0+j]);
+         opus_val32 partial;
+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
+               (m->eBands[i+1]-m->eBands[i])<<LM, arch);
          sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
       }
       sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
@@ -780,22 +779,13 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       minXC = sum;
       for (i=8;i<intensity;i++)
       {
-         int j;
-         opus_val32 partial = 0;
-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-            partial = MAC16_16(partial, X[j], X[N0+j]);
+         opus_val32 partial;
+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
+               (m->eBands[i+1]-m->eBands[i])<<LM, arch);
          minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
       }
       minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
       /*printf ("%f\n", sum);*/
-      if (sum > QCONST16(.995f,10))
-         trim_index-=4;
-      else if (sum > QCONST16(.92f,10))
-         trim_index-=3;
-      else if (sum > QCONST16(.85f,10))
-         trim_index-=2;
-      else if (sum > QCONST16(.8f,10))
-         trim_index-=1;
       /* mid-side savings estimations based on the LF average*/
       logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
       /* mid-side savings estimations based on min correlation */
@@ -819,14 +809,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
    } while (++c<C);
    diff /= C*(end-1);
    /*printf("%f\n", diff);*/
-   if (diff > QCONST16(2.f, DB_SHIFT))
-      trim_index--;
-   if (diff > QCONST16(8.f, DB_SHIFT))
-      trim_index--;
-   if (diff < -QCONST16(4.f, DB_SHIFT))
-      trim_index++;
-   if (diff < -QCONST16(10.f, DB_SHIFT))
-      trim_index++;
    trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
    trim -= SHR16(surround_trim, DB_SHIFT-8);
    trim -= 2*SHR16(tf_estimate, 14-8);
@@ -836,6 +818,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
             (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
    }
+#else
+   (void)analysis;
 #endif
 
 #ifdef FIXED_POINT
@@ -843,10 +827,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
 #else
    trim_index = (int)floor(.5f+trim);
 #endif
-   if (trim_index<0)
-      trim_index = 0;
-   if (trim_index>10)
-      trim_index = 10;
+   trim_index = IMAX(0, IMIN(10, trim_index));
    /*printf("%d\n", trim_index);*/
 #ifdef FUZZING
    trim_index = rand()%11;
@@ -886,6 +867,66 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X,
          > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
 }
 
+#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0)
+static opus_val16 median_of_5(const opus_val16 *x)
+{
+   opus_val16 t0, t1, t2, t3, t4;
+   t2 = x[2];
+   if (x[0] > x[1])
+   {
+      t0 = x[1];
+      t1 = x[0];
+   } else {
+      t0 = x[0];
+      t1 = x[1];
+   }
+   if (x[3] > x[4])
+   {
+      t3 = x[4];
+      t4 = x[3];
+   } else {
+      t3 = x[3];
+      t4 = x[4];
+   }
+   if (t0 > t3)
+   {
+      MSWAP(t0, t3);
+      MSWAP(t1, t4);
+   }
+   if (t2 > t1)
+   {
+      if (t1 < t3)
+         return MIN16(t2, t3);
+      else
+         return MIN16(t4, t1);
+   } else {
+      if (t2 < t3)
+         return MIN16(t1, t3);
+      else
+         return MIN16(t2, t4);
+   }
+}
+
+static opus_val16 median_of_3(const opus_val16 *x)
+{
+   opus_val16 t0, t1, t2;
+   if (x[0] > x[1])
+   {
+      t0 = x[1];
+      t1 = x[0];
+   } else {
+      t0 = x[0];
+      t1 = x[1];
+   }
+   t2 = x[2];
+   if (t1 < t2)
+      return t1;
+   else if (t0 < t2)
+      return t2;
+   else
+      return t0;
+}
+
 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
       int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
       int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
@@ -899,8 +940,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
    SAVE_STACK;
    ALLOC(follower, C*nbEBands, opus_val16);
    ALLOC(noise_floor, C*nbEBands, opus_val16);
-   for (i=0;i<nbEBands;i++)
-      offsets[i] = 0;
+   OPUS_CLEAR(offsets, nbEBands);
    /* Dynamic allocation code */
    maxDepth=-QCONST16(31.9f, DB_SHIFT);
    for (i=0;i<end;i++)
@@ -922,7 +962,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
       int last=0;
       c=0;do
       {
-         follower[c*nbEBands] = bandLogE2[c*nbEBands];
+         opus_val16 offset;
+         opus_val16 tmp;
+         opus_val16 *f;
+         f = &follower[c*nbEBands];
+         f[0] = bandLogE2[c*nbEBands];
          for (i=1;i<end;i++)
          {
             /* The last band to be at least 3 dB higher than the previous one
@@ -930,12 +974,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
                bandlimited signals. */
             if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
                last=i;
-            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+            f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
          }
          for (i=last-1;i>=0;i--)
-            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+            f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+
+         /* Combine with a median filter to avoid dynalloc triggering unnecessarily.
+            The "offset" value controls how conservative we are -- a higher offset
+            reduces the impact of the median filter and makes dynalloc use more bits. */
+         offset = QCONST16(1.f, DB_SHIFT);
+         for (i=2;i<end-2;i++)
+            f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset);
+         tmp = median_of_3(&bandLogE2[c*nbEBands])-offset;
+         f[0] = MAX16(f[0], tmp);
+         f[1] = MAX16(f[1], tmp);
+         tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset;
+         f[end-2] = MAX16(f[end-2], tmp);
+         f[end-1] = MAX16(f[end-1], tmp);
+
          for (i=0;i<end;i++)
-            follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
+            f[i] = MAX16(f[i], noise_floor[i]);
       } while (++c<C);
       if (C==2)
       {
@@ -1016,9 +1074,11 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
    opus_val16 pf_threshold;
    int pf_on;
    int qg;
+   int overlap;
    SAVE_STACK;
 
    mode = st->mode;
+   overlap = mode->overlap;
    ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
 
    pre[0] = _pre;
@@ -1027,7 +1087,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
 
    c=0; do {
       OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
-      OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
+      OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N);
    } while (++c<CC);
 
    if (enabled)
@@ -1044,7 +1104,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
       pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
       gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
-            N, &pitch_index, st->prefilter_period, st->prefilter_gain);
+            N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch);
       if (pitch_index > COMBFILTER_MAXPERIOD-2)
          pitch_index = COMBFILTER_MAXPERIOD-2;
       gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
@@ -1100,18 +1160,18 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
    /*printf("%d %f\n", pitch_index, gain1);*/
 
    c=0; do {
-      int offset = mode->shortMdctSize-st->overlap;
+      int offset = mode->shortMdctSize-overlap;
       st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
-      OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
+      OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap);
       if (offset)
-         comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
+         comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD,
                st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
-               st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
+               st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch);
 
-      comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
+      comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
             st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
-            st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
-      OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
+            st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
+      OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
 
       if (N>COMBFILTER_MAXPERIOD)
       {
@@ -1196,6 +1256,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
       /*printf("%f %f ", analysis->tonality, tonal);*/
       target = tonal_target;
    }
+#else
+   (void)analysis;
+   (void)pitch_change;
 #endif
 
    if (has_surround_mask&&!lfe)
@@ -1273,6 +1336,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    int LM, M;
    int tf_select;
    int nbFilledBytes, nbAvailableBytes;
+   int start;
+   int end;
    int effEnd;
    int codedBands;
    int tf_sum;
@@ -1316,6 +1381,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
+   start = st->start;
+   end = st->end;
    tf_estimate = 0;
    if (nbCompressedBytes<2 || pcm==NULL)
    {
@@ -1335,8 +1402,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    M=1<<LM;
    N = M*mode->shortMdctSize;
 
-   prefilter_mem = st->in_mem+CC*(st->overlap);
-   oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
+   prefilter_mem = st->in_mem+CC*(overlap);
+   oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
    oldLogE = oldBandE + CC*nbEBands;
    oldLogE2 = oldLogE + CC*nbEBands;
 
@@ -1352,8 +1419,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 #ifdef CUSTOM_MODES
    if (st->signalling && enc==NULL)
    {
-      int tmp = (mode->effEBands-st->end)>>1;
-      st->end = IMAX(1, mode->effEBands-tmp);
+      int tmp = (mode->effEBands-end)>>1;
+      end = st->end = IMAX(1, mode->effEBands-tmp);
       compressed[0] = tmp<<5;
       compressed[0] |= LM<<3;
       compressed[0] |= (C==2)<<2;
@@ -1436,11 +1503,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    }
    total_bits = nbCompressedBytes*8;
 
-   effEnd = st->end;
+   effEnd = end;
    if (effEnd > mode->effEBands)
       effEnd = mode->effEBands;
 
-   ALLOC(in, CC*(N+st->overlap), celt_sig);
+   ALLOC(in, CC*(N+overlap), celt_sig);
 
    sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
    st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
@@ -1474,8 +1541,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       enc->nbits_total+=tell-ec_tell(enc);
    }
    c=0; do {
-      celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
-                  mode->preemph, st->preemph_memE+c, st->clip);
+      int need_clip=0;
+#ifndef FIXED_POINT
+      need_clip = st->clip && sample_max>65536.f;
+#endif
+      celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample,
+                  mode->preemph, st->preemph_memE+c, need_clip);
    } while (++c<CC);
 
 
@@ -1484,7 +1555,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    {
       int enabled;
       int qg;
-      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf
             && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
 
       prefilter_tapset = st->tapset_decision;
@@ -1494,7 +1565,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          pitch_change = 1;
       if (pf_on==0)
       {
-         if(st->start==0 && tell+16<=total_bits)
+         if(start==0 && tell+16<=total_bits)
             ec_enc_bit_logp(enc, 0, 1);
       } else {
          /*This block is not gated by a total bits check only because
@@ -1515,7 +1586,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    shortBlocks = 0;
    if (st->complexity >= 1 && !st->lfe)
    {
-      isTransient = transient_analysis(in, N+st->overlap, CC,
+      isTransient = transient_analysis(in, N+overlap, CC,
             &tf_estimate, &tf_chan);
    }
    if (LM>0 && ec_tell(enc)+3<=total_bits)
@@ -1535,33 +1606,32 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    ALLOC(bandLogE2, C*nbEBands, opus_val16);
    if (secondMdct)
    {
-      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
-      compute_band_energies(mode, freq, bandE, effEnd, C, M);
-      amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
+      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
+      compute_band_energies(mode, freq, bandE, effEnd, C, LM);
+      amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
       for (i=0;i<C*nbEBands;i++)
          bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
    }
 
-   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
    if (CC==2&&C==1)
       tf_chan = 0;
-   compute_band_energies(mode, freq, bandE, effEnd, C, M);
+   compute_band_energies(mode, freq, bandE, effEnd, C, LM);
 
    if (st->lfe)
    {
-      for (i=2;i<st->end;i++)
+      for (i=2;i<end;i++)
       {
          bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
          bandE[i] = MAX32(bandE[i], EPSILON);
       }
    }
-   amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+   amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
 
    ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
-   for(i=0;i<st->end;i++)
-      surround_dynalloc[i] = 0;
+   OPUS_CLEAR(surround_dynalloc, end);
    /* This computes how much masking takes place between surround channels */
-   if (st->start==0&&st->energy_mask&&!st->lfe)
+   if (start==0&&st->energy_mask&&!st->lfe)
    {
       int mask_end;
       int midband;
@@ -1584,6 +1654,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
             diff += MULT16_16(mask, 1+2*i-mask_end);
          }
       }
+      celt_assert(count>0);
       mask_avg = DIV32_16(mask_avg,count);
       mask_avg += QCONST16(.2f, DB_SHIFT);
       diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
@@ -1621,8 +1692,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
                disabling masking. */
             mask_avg = 0;
             diff = 0;
-            for(i=0;i<mask_end;i++)
-               surround_dynalloc[i] = 0;
+            OPUS_CLEAR(surround_dynalloc, mask_end);
          } else {
             for(i=0;i<mask_end;i++)
                surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
@@ -1640,14 +1710,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
       opus_val32 frame_avg=0;
       opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
-      for(i=st->start;i<st->end;i++)
+      for(i=start;i<end;i++)
       {
          follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
          if (C==2)
             follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
          frame_avg += follow;
       }
-      frame_avg /= (st->end-st->start);
+      frame_avg /= (end-start);
       temporal_vbr = SUB16(frame_avg,st->spec_avg);
       temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
       st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
@@ -1658,21 +1728,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    if (!secondMdct)
    {
-      for (i=0;i<C*nbEBands;i++)
-         bandLogE2[i] = bandLogE[i];
+      OPUS_COPY(bandLogE2, bandLogE, C*nbEBands);
    }
 
    /* Last chance to catch any transient we might have missed in the
       time-domain analysis */
    if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
    {
-      if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C))
+      if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
       {
          isTransient = 1;
          shortBlocks = M;
-         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
-         compute_band_energies(mode, freq, bandE, effEnd, C, M);
-         amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
+         compute_band_energies(mode, freq, bandE, effEnd, C, LM);
+         amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
          /* Compensate for the scaling of short vs long mdcts */
          for (i=0;i<C*nbEBands;i++)
             bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
@@ -1690,7 +1759,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    ALLOC(tf_res, nbEBands, int);
    /* Disable variable tf resolution for hybrid and at very low bitrate */
-   if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe)
+   if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe)
    {
       int lambda;
       if (effectiveBytes<40)
@@ -1703,22 +1772,22 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          lambda = 3;
       lambda*=2;
       tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
-      for (i=effEnd;i<st->end;i++)
+      for (i=effEnd;i<end;i++)
          tf_res[i] = tf_res[effEnd-1];
    } else {
       tf_sum = 0;
-      for (i=0;i<st->end;i++)
+      for (i=0;i<end;i++)
          tf_res[i] = isTransient;
       tf_select=0;
    }
 
    ALLOC(error, C*nbEBands, opus_val16);
-   quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
+   quant_coarse_energy(mode, start, end, effEnd, bandLogE,
          oldBandE, total_bits, error, enc,
          C, LM, nbAvailableBytes, st->force_intra,
          &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
 
-   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
+   tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc);
 
    if (ec_tell(enc)+4<=total_bits)
    {
@@ -1726,7 +1795,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       {
          st->tapset_decision = 0;
          st->spread_decision = SPREAD_NORMAL;
-      } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
+      } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0)
       {
          if (st->complexity == 0)
             st->spread_decision = SPREAD_NONE;
@@ -1760,7 +1829,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    ALLOC(offsets, nbEBands, int);
 
-   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
+   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
          st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
          eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
    /* For LFE, everything interesting is in the first band */
@@ -1773,7 +1842,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    total_bits<<=BITRES;
    total_boost = 0;
    tell = ec_tell_frac(enc);
-   for (i=st->start;i<st->end;i++)
+   for (i=start;i<end;i++)
    {
       int width, quanta;
       int dynalloc_loop_logp;
@@ -1818,7 +1887,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
       st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
             intensity_thresholds, intensity_histeresis, 21, st->intensity);
-      st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+      st->intensity = IMIN(end,IMAX(start, st->intensity));
    }
 
    alloc_trim = 5;
@@ -1828,7 +1897,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          alloc_trim = 5;
       else
          alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
-            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
+            end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
+            st->intensity, surround_trim, st->arch);
       ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
       tell = ec_tell_frac(enc);
    }
@@ -1930,7 +2000,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
    bits -= anti_collapse_rsv;
-   signalBandwidth = st->end-1;
+   signalBandwidth = end-1;
 #ifndef DISABLE_FLOAT_API
    if (st->analysis.valid)
    {
@@ -1950,7 +2020,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 #endif
    if (st->lfe)
       signalBandwidth = 1;
-   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+   codedBands = compute_allocation(mode, start, end, offsets, cap,
          alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
          fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
    if (st->lastCodedBands)
@@ -1958,13 +2028,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    else
       st->lastCodedBands = codedBands;
 
-   quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
+   quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C);
 
    /* Residual quantisation */
    ALLOC(collapse_masks, C*nbEBands, unsigned char);
-   quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
-         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
+   quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
+         bandE, pulses, shortBlocks, st->spread_decision,
+         dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
+         balance, enc, LM, codedBands, &st->rng, st->arch);
 
    if (anti_collapse_rsv > 0)
    {
@@ -1974,7 +2045,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 #endif
       ec_enc_bits(enc, anti_collapse_on, 1);
    }
-   quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
+   quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
 
    if (silence)
    {
@@ -1990,40 +2061,26 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       if (anti_collapse_on)
       {
          anti_collapse(mode, X, collapse_masks, LM, C, N,
-               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-      }
-
-      if (silence)
-      {
-         for (i=0;i<C*N;i++)
-            freq[i] = 0;
-      } else {
-         /* Synthesis */
-         denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+               start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
       }
 
       c=0; do {
          OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
       } while (++c<CC);
 
-      if (CC==2&&C==1)
-      {
-         for (i=0;i<N;i++)
-            freq[N+i] = freq[i];
-      }
-
       c=0; do {
          out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
       } while (++c<CC);
 
-      compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
+      celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd,
+                     C, CC, isTransient, LM, st->upsample, silence, st->arch);
 
       c=0; do {
          st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
          st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
          comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
                st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
-               mode->window, st->overlap);
+               mode->window, overlap);
          if (LM!=0)
             comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
                   st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
@@ -2031,7 +2088,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       } while (++c<CC);
 
       /* We reuse freq[] as scratch space for the de-emphasis */
-      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
+      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
       st->prefilter_period_old = st->prefilter_period;
       st->prefilter_gain_old = st->prefilter_gain;
       st->prefilter_tapset_old = st->prefilter_tapset;
@@ -2051,16 +2108,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 #endif
 
    if (CC==2&&C==1) {
-      for (i=0;i<nbEBands;i++)
-         oldBandE[nbEBands+i]=oldBandE[i];
+      OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
    }
 
    if (!isTransient)
    {
-      for (i=0;i<CC*nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<CC*nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
+      OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands);
+      OPUS_COPY(oldLogE, oldBandE, CC*nbEBands);
    } else {
       for (i=0;i<CC*nbEBands;i++)
          oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
@@ -2068,12 +2122,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    /* In case start or end were to change */
    c=0; do
    {
-      for (i=0;i<st->start;i++)
+      for (i=0;i<start;i++)
       {
          oldBandE[c*nbEBands+i]=0;
          oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
       }
-      for (i=st->end;i<nbEBands;i++)
+      for (i=end;i<nbEBands;i++)
       {
          oldBandE[c*nbEBands+i]=0;
          oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@@ -2274,7 +2328,7 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
       {
          int i;
          opus_val16 *oldBandE, *oldLogE, *oldLogE2;
-         oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
+         oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD));
          oldLogE = oldBandE + st->channels*st->mode->nbEBands;
          oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
          OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
diff --git a/media/libopus/celt/celt_lpc.c b/media/libopus/celt/celt_lpc.c
index fa29d626ea..f02145af0d 100644
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -88,12 +88,15 @@ int          p
 #endif
 }
 
-void celt_fir(const opus_val16 *_x,
+
+void celt_fir_c(
+         const opus_val16 *_x,
          const opus_val16 *num,
          opus_val16 *_y,
          int N,
          int ord,
-         opus_val16 *mem)
+         opus_val16 *mem,
+         int arch)
 {
    int i,j;
    VARDECL(opus_val16, rnum);
@@ -111,6 +114,7 @@ void celt_fir(const opus_val16 *_x,
    for(i=0;i<ord;i++)
       mem[i] = _x[N-i-1];
 #ifdef SMALL_FOOTPRINT
+   (void)arch;
    for (i=0;i<N;i++)
    {
       opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
@@ -124,7 +128,7 @@ void celt_fir(const opus_val16 *_x,
    for (i=0;i<N-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
-      xcorr_kernel(rnum, x+i, sum, ord);
+      xcorr_kernel(rnum, x+i, sum, ord, arch);
       _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
       _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
       _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
@@ -146,10 +150,12 @@ void celt_iir(const opus_val32 *_x,
          opus_val32 *_y,
          int N,
          int ord,
-         opus_val16 *mem)
+         opus_val16 *mem,
+         int arch)
 {
 #ifdef SMALL_FOOTPRINT
    int i,j;
+   (void)arch;
    for (i=0;i<N;i++)
    {
       opus_val32 sum = _x[i];
@@ -187,7 +193,7 @@ void celt_iir(const opus_val32 *_x,
       sum[1]=_x[i+1];
       sum[2]=_x[i+2];
       sum[3]=_x[i+3];
-      xcorr_kernel(rden, y+i, sum, ord);
+      xcorr_kernel(rden, y+i, sum, ord, arch);
 
       /* Patch up the result to compensate for the fact that this is an IIR */
       y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
diff --git a/media/libopus/celt/celt_lpc.h b/media/libopus/celt/celt_lpc.h
index dc2a0a3d26..323459eb1a 100644
--- a/media/libopus/celt/celt_lpc.h
+++ b/media/libopus/celt/celt_lpc.h
@@ -29,24 +29,37 @@
 #define PLC_H
 
 #include "arch.h"
+#include "cpu_support.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#include "x86/celt_lpc_sse.h"
+#endif
 
 #define LPC_ORDER 24
 
 void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
 
-void celt_fir(const opus_val16 *x,
+void celt_fir_c(
+         const opus_val16 *x,
          const opus_val16 *num,
          opus_val16 *y,
          int N,
          int ord,
-         opus_val16 *mem);
+         opus_val16 *mem,
+         int arch);
+
+#if !defined(OVERRIDE_CELT_FIR)
+#define celt_fir(x, num, y, N, ord, mem, arch) \
+    (celt_fir_c(x, num, y, N, ord, mem, arch))
+#endif
 
 void celt_iir(const opus_val32 *x,
          const opus_val16 *den,
          opus_val32 *y,
          int N,
          int ord,
-         opus_val16 *mem);
+         opus_val16 *mem,
+         int arch);
 
 int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
          const opus_val16 *window, int overlap, int lag, int n, int arch);
diff --git a/media/libopus/celt/cpu_support.h b/media/libopus/celt/cpu_support.h
index d68dbe62c5..68fc60678f 100644
--- a/media/libopus/celt/cpu_support.h
+++ b/media/libopus/celt/cpu_support.h
@@ -31,7 +31,8 @@
 #include "opus_types.h"
 #include "opus_defines.h"
 
-#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
+#if defined(OPUS_HAVE_RTCD) && \
+  (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 #include "arm/armcpu.h"
 
 /* We currently support 4 ARM variants:
@@ -42,6 +43,22 @@
  */
 #define OPUS_ARCHMASK 3
 
+#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+
+#include "x86/x86cpu.h"
+/* We currently support 5 x86 variants:
+ * arch[0] -> non-sse
+ * arch[1] -> sse
+ * arch[2] -> sse2
+ * arch[3] -> sse4.1
+ * arch[4] -> avx
+ */
+#define OPUS_ARCHMASK 7
+int opus_select_arch(void);
+
 #else
 #define OPUS_ARCHMASK 0
 
@@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void)
   return 0;
 }
 #endif
-
 #endif
diff --git a/media/libopus/celt/cwrs.c b/media/libopus/celt/cwrs.c
index ad980cc7d8..2fa9f89cd6 100644
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
   ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
 }
 
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
   opus_uint32 p;
   int         s;
   int         k0;
+  opus_int16  val;
+  opus_val32  yy=0;
   celt_assert(_k>0);
   celt_assert(_n>1);
   while(_n>2){
@@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
       }
       else for(p=row[_k];p>_i;p=row[_k])_k--;
       _i-=p;
-      *_y++=(k0-_k+s)^s;
+      val=(k0-_k+s)^s;
+      *_y++=val;
+      yy=MAC16_16(yy,val,val);
     }
     /*Lots of dimensions case:*/
     else{
@@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
         do p=CELT_PVQ_U_ROW[--_k][_n];
         while(p>_i);
         _i-=p;
-        *_y++=(k0-_k+s)^s;
+        val=(k0-_k+s)^s;
+        *_y++=val;
+        yy=MAC16_16(yy,val,val);
       }
     }
     _n--;
@@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
   k0=_k;
   _k=(_i+1)>>1;
   if(_k)_i-=2*_k-1;
-  *_y++=(k0-_k+s)^s;
+  val=(k0-_k+s)^s;
+  *_y++=val;
+  yy=MAC16_16(yy,val,val);
   /*_n==1*/
   s=-(int)_i;
-  *_y=(_k+s)^s;
+  val=(_k+s)^s;
+  *_y=val;
+  yy=MAC16_16(yy,val,val);
+  return yy;
 }
 
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
 }
 
 #else /* SMALL_FOOTPRINT */
@@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
   _y: Returns the vector of pulses.
   _u: Must contain entries [0..._k+1] of row _n of U() on input.
       Its contents will be destructively modified.*/
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
   int j;
+  opus_int16 val;
+  opus_val32 yy=0;
   celt_assert(_n>0);
   j=0;
   do{
@@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
     while(p>_i)p=_u[--_k];
     _i-=p;
     yj-=_k;
-    _y[j]=(yj+s)^s;
+    val=(yj+s)^s;
+    _y[j]=val;
+    yy=MAC16_16(yy,val,val);
     uprev(_u,_k+2,0);
   }
   while(++j<_n);
+  return yy;
 }
 
 /*Returns the index of the given combination of K elements chosen from a set
@@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
   RESTORE_STACK;
 }
 
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
   VARDECL(opus_uint32,u);
+  int ret;
   SAVE_STACK;
   celt_assert(_k>0);
   ALLOC(u,_k+2U,opus_uint32);
-  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+  ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
   RESTORE_STACK;
+  return ret;
 }
 
 #endif /* SMALL_FOOTPRINT */
diff --git a/media/libopus/celt/cwrs.h b/media/libopus/celt/cwrs.h
index 7dfbd076d1..7cd4717459 100644
--- a/media/libopus/celt/cwrs.h
+++ b/media/libopus/celt/cwrs.h
@@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac);
 
 void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
 
-void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
 
 #endif /* CWRS_H */
diff --git a/media/libopus/celt/entcode.c b/media/libopus/celt/entcode.c
index fa5d7c7c2c..70f32016ec 100644
--- a/media/libopus/celt/entcode.c
+++ b/media/libopus/celt/entcode.c
@@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){
 }
 #endif
 
+#if 1
+/* This is a faster version of ec_tell_frac() that takes advantage
+   of the low (1/8 bit) resolution to use just a linear function
+   followed by a lookup to determine the exact transition thresholds. */
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+  static const unsigned correction[8] =
+    {35733, 38967, 42495, 46340,
+     50535, 55109, 60097, 65535};
+  opus_uint32 nbits;
+  opus_uint32 r;
+  int         l;
+  unsigned    b;
+  nbits=_this->nbits_total<<BITRES;
+  l=EC_ILOG(_this->rng);
+  r=_this->rng>>(l-16);
+  b = (r>>12)-8;
+  b += r>correction[b];
+  l = (l<<3)+b;
+  return nbits-l;
+}
+#else
 opus_uint32 ec_tell_frac(ec_ctx *_this){
   opus_uint32 nbits;
   opus_uint32 r;
@@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
   }
   return nbits-l;
 }
+#endif
+
+#ifdef USE_SMALL_DIV_TABLE
+/* Result of 2^32/(2*i+1), except for i=0. */
+const opus_uint32 SMALL_DIV_TABLE[129] = {
+   0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
+   0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
+   0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
+   0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
+   0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
+   0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
+   0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
+   0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
+   0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
+   0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
+   0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
+   0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
+   0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
+   0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
+   0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
+   0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
+   0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
+   0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
+   0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
+   0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
+   0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
+   0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
+   0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
+   0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
+   0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
+   0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
+   0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
+   0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
+   0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
+   0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
+   0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
+   0x01073260, 0x0105197F, 0x0103091B, 0x01010101
+};
+#endif
diff --git a/media/libopus/celt/entcode.h b/media/libopus/celt/entcode.h
index dd13e49e50..13d6c84ef0 100644
--- a/media/libopus/celt/entcode.h
+++ b/media/libopus/celt/entcode.h
@@ -34,6 +34,12 @@
 # include <stddef.h>
 # include "ecintrin.h"
 
+extern const opus_uint32 SMALL_DIV_TABLE[129];
+
+#ifdef OPUS_ARM_ASM
+#define USE_SMALL_DIV_TABLE
+#endif
+
 /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
    larger type, you can speed up the decoder by using it here.*/
 typedef opus_uint32           ec_window;
@@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
            rounding error is in the positive direction).*/
 opus_uint32 ec_tell_frac(ec_ctx *_this);
 
+/* Tested exhaustively for all n and for 1<=d<=256 */
+static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
+   celt_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+   if (d>256)
+      return n/d;
+   else {
+      opus_uint32 t, q;
+      t = EC_ILOG(d&-d);
+      q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
+      return q+(n-q*d >= d);
+   }
+#else
+   return n/d;
+#endif
+}
+
+static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
+   celt_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+   if (n<0)
+      return -(opus_int32)celt_udiv(-n, d);
+   else
+      return celt_udiv(n, d);
+#else
+   return n/d;
+#endif
+}
+
 #endif
diff --git a/media/libopus/celt/entdec.c b/media/libopus/celt/entdec.c
index 3c264685c2..0b3433ed8b 100644
--- a/media/libopus/celt/entdec.c
+++ b/media/libopus/celt/entdec.c
@@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
 
 unsigned ec_decode(ec_dec *_this,unsigned _ft){
   unsigned s;
-  _this->ext=_this->rng/_ft;
+  _this->ext=celt_udiv(_this->rng,_ft);
   s=(unsigned)(_this->val/_this->ext);
   return _ft-EC_MINI(s+1,_ft);
 }
diff --git a/media/libopus/celt/entenc.c b/media/libopus/celt/entenc.c
index a7e34ecef9..f1750d25b8 100644
--- a/media/libopus/celt/entenc.c
+++ b/media/libopus/celt/entenc.c
@@ -98,7 +98,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){
   else _this->ext++;
 }
 
-static void ec_enc_normalize(ec_enc *_this){
+static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){
   /*If the range is too small, output some bits and rescale it.*/
   while(_this->rng<=EC_CODE_BOT){
     ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
@@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
 
 void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
   opus_uint32 r;
-  r=_this->rng/_ft;
+  r=celt_udiv(_this->rng,_ft);
   if(_fl>0){
     _this->val+=_this->rng-IMUL32(r,(_ft-_fl));
     _this->rng=IMUL32(r,(_fh-_fl));
diff --git a/media/libopus/celt/fixed_debug.h b/media/libopus/celt/fixed_debug.h
index 80bc94910f..d28227f5dc 100644
--- a/media/libopus/celt/fixed_debug.h
+++ b/media/libopus/celt/fixed_debug.h
@@ -496,6 +496,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int
 
 #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
 #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
+#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b))))
 
 static OPUS_INLINE int SATURATE(int a, int b)
 {
@@ -767,6 +768,16 @@ static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
    return res;
 }
 
+static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+}
+#define SIG2WORD16(x) (SIG2WORD16_generic(x))
+
+
 #undef PRINT_MIPS
 #define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
 
diff --git a/media/libopus/celt/fixed_generic.h b/media/libopus/celt/fixed_generic.h
index ecf018a244..ac67d37ce8 100644
--- a/media/libopus/celt/fixed_generic.h
+++ b/media/libopus/celt/fixed_generic.h
@@ -113,7 +113,11 @@
 /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
     b must fit in 31 bits.
     Result fits in 32 bits. */
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add.
+    Results fits in 32 bits */
+#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)))
 
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
 #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
@@ -131,4 +135,17 @@
 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
 
+#if defined(MIPSr1_ASM)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
+static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+}
+#define SIG2WORD16(x) (SIG2WORD16_generic(x))
+
 #endif
diff --git a/media/libopus/celt/float_cast.h b/media/libopus/celt/float_cast.h
index ede6574860..ed5a39b543 100644
--- a/media/libopus/celt/float_cast.h
+++ b/media/libopus/celt/float_cast.h
@@ -90,14 +90,14 @@
 #include <math.h>
 #define float2int(x) lrint(x)
 
-#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64)
         #include <xmmintrin.h>
 
         __inline long int float2int(float value)
         {
                 return _mm_cvtss_si32(_mm_load_ss(&value));
         }
-#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
         #include <math.h>
 
         /*      Win32 doesn't seem to have these functions.
diff --git a/media/libopus/celt/kiss_fft.c b/media/libopus/celt/kiss_fft.c
index ad706c7397..4ed37d2bb7 100644
--- a/media/libopus/celt/kiss_fft.c
+++ b/media/libopus/celt/kiss_fft.c
@@ -47,64 +47,56 @@
 
 static void kf_bfly2(
                      kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
                      int m,
-                     int N,
-                     int mm
+                     int N
                     )
 {
    kiss_fft_cpx * Fout2;
-   const kiss_twiddle_cpx * tw1;
-   int i,j;
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
+   int i;
+   (void)m;
+#ifdef CUSTOM_MODES
+   if (m==1)
    {
-      Fout = Fout_beg + i*mm;
-      Fout2 = Fout + m;
-      tw1 = st->twiddles;
-      for(j=0;j<m;j++)
+      celt_assert(m==1);
+      for (i=0;i<N;i++)
       {
          kiss_fft_cpx t;
-         Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
-         Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
-         C_MUL (t,  *Fout2 , *tw1);
-         tw1 += fstride;
+         Fout2 = Fout + 1;
+         t = *Fout2;
          C_SUB( *Fout2 ,  *Fout , t );
          C_ADDTO( *Fout ,  t );
-         ++Fout2;
-         ++Fout;
+         Fout += 2;
       }
-   }
-}
-
-static void ki_bfly2(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx * Fout2;
-   const kiss_twiddle_cpx * tw1;
-   kiss_fft_cpx t;
-   int i,j;
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
+   } else
+#endif
    {
-      Fout = Fout_beg + i*mm;
-      Fout2 = Fout + m;
-      tw1 = st->twiddles;
-      for(j=0;j<m;j++)
+      opus_val16 tw;
+      tw = QCONST16(0.7071067812f, 15);
+      /* We know that m==4 here because the radix-2 is just after a radix-4 */
+      celt_assert(m==4);
+      for (i=0;i<N;i++)
       {
-         C_MULC (t,  *Fout2 , *tw1);
-         tw1 += fstride;
-         C_SUB( *Fout2 ,  *Fout , t );
-         C_ADDTO( *Fout ,  t );
-         ++Fout2;
-         ++Fout;
+         kiss_fft_cpx t;
+         Fout2 = Fout + 4;
+         t = Fout2[0];
+         C_SUB( Fout2[0] ,  Fout[0] , t );
+         C_ADDTO( Fout[0] ,  t );
+
+         t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
+         t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
+         C_SUB( Fout2[1] ,  Fout[1] , t );
+         C_ADDTO( Fout[1] ,  t );
+
+         t.r = Fout2[2].i;
+         t.i = -Fout2[2].r;
+         C_SUB( Fout2[2] ,  Fout[2] , t );
+         C_ADDTO( Fout[2] ,  t );
+
+         t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
+         t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
+         C_SUB( Fout2[3] ,  Fout[3] , t );
+         C_ADDTO( Fout[3] ,  t );
+         Fout += 8;
       }
    }
 }
@@ -118,88 +110,66 @@ static void kf_bfly4(
                      int mm
                     )
 {
-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
-   kiss_fft_cpx scratch[6];
-   const size_t m2=2*m;
-   const size_t m3=3*m;
-   int i, j;
+   int i;
 
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
+   if (m==1)
    {
-      Fout = Fout_beg + i*mm;
-      tw3 = tw2 = tw1 = st->twiddles;
-      for (j=0;j<m;j++)
+      /* Degenerate case where all the twiddles are 1. */
+      for (i=0;i<N;i++)
       {
-         C_MUL4(scratch[0],Fout[m] , *tw1 );
-         C_MUL4(scratch[1],Fout[m2] , *tw2 );
-         C_MUL4(scratch[2],Fout[m3] , *tw3 );
+         kiss_fft_cpx scratch0, scratch1;
 
-         Fout->r = PSHR32(Fout->r, 2);
-         Fout->i = PSHR32(Fout->i, 2);
-         C_SUB( scratch[5] , *Fout, scratch[1] );
-         C_ADDTO(*Fout, scratch[1]);
-         C_ADD( scratch[3] , scratch[0] , scratch[2] );
-         C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         C_SUB( Fout[m2], *Fout, scratch[3] );
-         tw1 += fstride;
-         tw2 += fstride*2;
-         tw3 += fstride*3;
-         C_ADDTO( *Fout , scratch[3] );
+         C_SUB( scratch0 , *Fout, Fout[2] );
+         C_ADDTO(*Fout, Fout[2]);
+         C_ADD( scratch1 , Fout[1] , Fout[3] );
+         C_SUB( Fout[2], *Fout, scratch1 );
+         C_ADDTO( *Fout , scratch1 );
+         C_SUB( scratch1 , Fout[1] , Fout[3] );
 
-         Fout[m].r = scratch[5].r + scratch[4].i;
-         Fout[m].i = scratch[5].i - scratch[4].r;
-         Fout[m3].r = scratch[5].r - scratch[4].i;
-         Fout[m3].i = scratch[5].i + scratch[4].r;
-         ++Fout;
+         Fout[1].r = scratch0.r + scratch1.i;
+         Fout[1].i = scratch0.i - scratch1.r;
+         Fout[3].r = scratch0.r - scratch1.i;
+         Fout[3].i = scratch0.i + scratch1.r;
+         Fout+=4;
+      }
+   } else {
+      int j;
+      kiss_fft_cpx scratch[6];
+      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+      const int m2=2*m;
+      const int m3=3*m;
+      kiss_fft_cpx * Fout_beg = Fout;
+      for (i=0;i<N;i++)
+      {
+         Fout = Fout_beg + i*mm;
+         tw3 = tw2 = tw1 = st->twiddles;
+         /* m is guaranteed to be a multiple of 4. */
+         for (j=0;j<m;j++)
+         {
+            C_MUL(scratch[0],Fout[m] , *tw1 );
+            C_MUL(scratch[1],Fout[m2] , *tw2 );
+            C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+            C_SUB( scratch[5] , *Fout, scratch[1] );
+            C_ADDTO(*Fout, scratch[1]);
+            C_ADD( scratch[3] , scratch[0] , scratch[2] );
+            C_SUB( scratch[4] , scratch[0] , scratch[2] );
+            C_SUB( Fout[m2], *Fout, scratch[3] );
+            tw1 += fstride;
+            tw2 += fstride*2;
+            tw3 += fstride*3;
+            C_ADDTO( *Fout , scratch[3] );
+
+            Fout[m].r = scratch[5].r + scratch[4].i;
+            Fout[m].i = scratch[5].i - scratch[4].r;
+            Fout[m3].r = scratch[5].r - scratch[4].i;
+            Fout[m3].i = scratch[5].i + scratch[4].r;
+            ++Fout;
+         }
       }
    }
 }
 
-static void ki_bfly4(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
-   kiss_fft_cpx scratch[6];
-   const size_t m2=2*m;
-   const size_t m3=3*m;
-   int i, j;
-
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw3 = tw2 = tw1 = st->twiddles;
-      for (j=0;j<m;j++)
-      {
-         C_MULC(scratch[0],Fout[m] , *tw1 );
-         C_MULC(scratch[1],Fout[m2] , *tw2 );
-         C_MULC(scratch[2],Fout[m3] , *tw3 );
-
-         C_SUB( scratch[5] , *Fout, scratch[1] );
-         C_ADDTO(*Fout, scratch[1]);
-         C_ADD( scratch[3] , scratch[0] , scratch[2] );
-         C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         C_SUB( Fout[m2], *Fout, scratch[3] );
-         tw1 += fstride;
-         tw2 += fstride*2;
-         tw3 += fstride*3;
-         C_ADDTO( *Fout , scratch[3] );
-
-         Fout[m].r = scratch[5].r - scratch[4].i;
-         Fout[m].i = scratch[5].i + scratch[4].r;
-         Fout[m3].r = scratch[5].r + scratch[4].i;
-         Fout[m3].i = scratch[5].i - scratch[4].r;
-         ++Fout;
-      }
-   }
-}
 
 #ifndef RADIX_TWO_ONLY
 
@@ -220,14 +190,19 @@ static void kf_bfly3(
    kiss_twiddle_cpx epi3;
 
    kiss_fft_cpx * Fout_beg = Fout;
+#ifdef FIXED_POINT
+   epi3.r = -16384;
+   epi3.i = -28378;
+#else
    epi3 = st->twiddles[fstride*m];
+#endif
    for (i=0;i<N;i++)
    {
       Fout = Fout_beg + i*mm;
       tw1=tw2=st->twiddles;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
       k=m;
       do {
-         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
 
          C_MUL(scratch[1],Fout[m] , *tw1);
          C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -255,56 +230,8 @@ static void kf_bfly3(
    }
 }
 
-static void ki_bfly3(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   int i, k;
-   const size_t m2 = 2*m;
-   const kiss_twiddle_cpx *tw1,*tw2;
-   kiss_fft_cpx scratch[5];
-   kiss_twiddle_cpx epi3;
-
-   kiss_fft_cpx * Fout_beg = Fout;
-   epi3 = st->twiddles[fstride*m];
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw1=tw2=st->twiddles;
-      k=m;
-      do{
-
-         C_MULC(scratch[1],Fout[m] , *tw1);
-         C_MULC(scratch[2],Fout[m2] , *tw2);
-
-         C_ADD(scratch[3],scratch[1],scratch[2]);
-         C_SUB(scratch[0],scratch[1],scratch[2]);
-         tw1 += fstride;
-         tw2 += fstride*2;
-
-         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
-         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-
-         C_MULBYSCALAR( scratch[0] , -epi3.i );
-
-         C_ADDTO(*Fout,scratch[3]);
-
-         Fout[m2].r = Fout[m].r + scratch[0].i;
-         Fout[m2].i = Fout[m].i - scratch[0].r;
-
-         Fout[m].r -= scratch[0].i;
-         Fout[m].i += scratch[0].r;
-
-         ++Fout;
-      }while(--k);
-   }
-}
 
+#ifndef OVERRIDE_kf_bfly5
 static void kf_bfly5(
                      kiss_fft_cpx * Fout,
                      const size_t fstride,
@@ -317,13 +244,19 @@ static void kf_bfly5(
    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
    int i, u;
    kiss_fft_cpx scratch[13];
-   const kiss_twiddle_cpx * twiddles = st->twiddles;
    const kiss_twiddle_cpx *tw;
    kiss_twiddle_cpx ya,yb;
    kiss_fft_cpx * Fout_beg = Fout;
 
-   ya = twiddles[fstride*m];
-   yb = twiddles[fstride*2*m];
+#ifdef FIXED_POINT
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
    tw=st->twiddles;
 
    for (i=0;i<N;i++)
@@ -335,8 +268,8 @@ static void kf_bfly5(
       Fout3=Fout0+3*m;
       Fout4=Fout0+4*m;
 
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
       for ( u=0; u<m; ++u ) {
-         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
          scratch[0] = *Fout0;
 
          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -373,74 +306,8 @@ static void kf_bfly5(
       }
    }
 }
+#endif /* OVERRIDE_kf_bfly5 */
 
-static void ki_bfly5(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-   int i, u;
-   kiss_fft_cpx scratch[13];
-   const kiss_twiddle_cpx * twiddles = st->twiddles;
-   const kiss_twiddle_cpx *tw;
-   kiss_twiddle_cpx ya,yb;
-   kiss_fft_cpx * Fout_beg = Fout;
-
-   ya = twiddles[fstride*m];
-   yb = twiddles[fstride*2*m];
-   tw=st->twiddles;
-
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-
-      for ( u=0; u<m; ++u ) {
-         scratch[0] = *Fout0;
-
-         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
-         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
-         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
-         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-
-         C_ADD( scratch[7],scratch[1],scratch[4]);
-         C_SUB( scratch[10],scratch[1],scratch[4]);
-         C_ADD( scratch[8],scratch[2],scratch[3]);
-         C_SUB( scratch[9],scratch[2],scratch[3]);
-
-         Fout0->r += scratch[7].r + scratch[8].r;
-         Fout0->i += scratch[7].i + scratch[8].i;
-
-         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
-         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-
-         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
-         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-
-         C_SUB(*Fout1,scratch[5],scratch[6]);
-         C_ADD(*Fout4,scratch[5],scratch[6]);
-
-         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
-         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
-         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
-         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-
-         C_ADD(*Fout2,scratch[11],scratch[12]);
-         C_SUB(*Fout3,scratch[11],scratch[12]);
-
-         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-   }
-}
 
 #endif
 
@@ -488,6 +355,9 @@ static
 int kf_factor(int n,opus_int16 * facbuf)
 {
     int p=4;
+    int i;
+    int stages=0;
+    int nbak = n;
 
     /*factor out powers of 4, powers of 2, then any remaining primes */
     do {
@@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
         {
            return 0;
         }
-        *facbuf++ = p;
-        *facbuf++ = n;
+        facbuf[2*stages] = p;
+        if (p==2 && stages > 1)
+        {
+           facbuf[2*stages] = 4;
+           facbuf[2] = 2;
+        }
+        stages++;
     } while (n > 1);
+    n = nbak;
+    /* Reverse the order to get the radix 4 at the end, so we can use the
+       fast degenerate case. It turns out that reversing the order also
+       improves the noise behaviour. */
+    for (i=0;i<stages/2;i++)
+    {
+       int tmp;
+       tmp = facbuf[2*i];
+       facbuf[2*i] = facbuf[2*(stages-i-1)];
+       facbuf[2*(stages-i-1)] = tmp;
+    }
+    for (i=0;i<stages;i++)
+    {
+        n /= facbuf[2*i];
+        facbuf[2*i+1] = n;
+    }
     return 1;
 }
 
@@ -532,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
 #endif
 }
 
+int opus_fft_alloc_arch_c(kiss_fft_state *st) {
+   (void)st;
+   return 0;
+}
+
 /*
  *
  * Allocates all necessary storage space for the fft and ifft.
  * The return value is a contiguous block of memory.  As such,
  * It can be freed with free().
  * */
-kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  const kiss_fft_state *base)
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
+                                        const kiss_fft_state *base, int arch)
 {
     kiss_fft_state *st=NULL;
     size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
@@ -555,14 +452,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  co
         kiss_twiddle_cpx *twiddles;
 
         st->nfft=nfft;
-#ifndef FIXED_POINT
+#ifdef FIXED_POINT
+        st->scale_shift = celt_ilog2(st->nfft);
+        if (st->nfft == 1<<st->scale_shift)
+           st->scale = Q15ONE;
+        else
+           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
+#else
         st->scale = 1.f/nfft;
 #endif
         if (base != NULL)
         {
            st->twiddles = base->twiddles;
            st->shift = 0;
-           while (nfft<<st->shift != base->nfft && st->shift < 32)
+           while (st->shift < 32 && nfft<<st->shift != base->nfft)
               st->shift++;
            if (st->shift>=32)
               goto fail;
@@ -581,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  co
         if (st->bitrev==NULL)
             goto fail;
         compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+
+        /* Initialize architecture specific fft parameters */
+        if (opus_fft_alloc_arch(st, arch))
+            goto fail;
     }
     return st;
 fail:
-    opus_fft_free(st);
+    opus_fft_free(st, arch);
     return NULL;
 }
 
-kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
 {
-   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
+   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
 }
 
-void opus_fft_free(const kiss_fft_state *cfg)
+void opus_fft_free_arch_c(kiss_fft_state *st) {
+   (void)st;
+}
+
+void opus_fft_free(const kiss_fft_state *cfg, int arch)
 {
    if (cfg)
    {
+      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
       opus_free((opus_int16*)cfg->bitrev);
       if (cfg->shift < 0)
          opus_free((kiss_twiddle_cpx*)cfg->twiddles);
@@ -606,7 +518,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
 
 #endif /* CUSTOM_MODES */
 
-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
 {
     int m2, m;
     int p;
@@ -618,17 +530,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
     /* st->shift can be -1 */
     shift = st->shift>0 ? st->shift : 0;
 
-    celt_assert2 (fin != fout, "In-place FFT not supported");
-    /* Bit-reverse the input */
-    for (i=0;i<st->nfft;i++)
-    {
-       fout[st->bitrev[i]] = fin[i];
-#ifndef FIXED_POINT
-       fout[st->bitrev[i]].r *= st->scale;
-       fout[st->bitrev[i]].i *= st->scale;
-#endif
-    }
-
     fstride[0] = 1;
     L=0;
     do {
@@ -647,7 +548,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
        switch (st->factors[2*i])
        {
        case 2:
-          kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          kf_bfly2(fout, m, fstride[i]);
           break;
        case 4:
           kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -665,55 +566,39 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
     }
 }
 
-void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 {
-   int m2, m;
-   int p;
-   int L;
-   int fstride[MAXFACTORS];
    int i;
-   int shift;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+   scale = st->scale;
 
-   /* st->shift can be -1 */
-   shift = st->shift>0 ? st->shift : 0;
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+   {
+      kiss_fft_cpx x = fin[i];
+      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
+      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
+   }
+   opus_fft_impl(st, fout);
+}
+
+
+void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int i;
    celt_assert2 (fin != fout, "In-place FFT not supported");
    /* Bit-reverse the input */
    for (i=0;i<st->nfft;i++)
       fout[st->bitrev[i]] = fin[i];
-
-   fstride[0] = 1;
-   L=0;
-   do {
-      p = st->factors[2*L];
-      m = st->factors[2*L+1];
-      fstride[L+1] = fstride[L]*p;
-      L++;
-   } while(m!=1);
-   m = st->factors[2*L-1];
-   for (i=L-1;i>=0;i--)
-   {
-      if (i!=0)
-         m2 = st->factors[2*i-1];
-      else
-         m2 = 1;
-      switch (st->factors[2*i])
-      {
-      case 2:
-         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 4:
-         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#ifndef RADIX_TWO_ONLY
-      case 3:
-         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 5:
-         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#endif
-      }
-      m = m2;
-   }
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+   opus_fft_impl(st, fout);
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
 }
-
diff --git a/media/libopus/celt/kiss_fft.h b/media/libopus/celt/kiss_fft.h
index 66332e3bb9..bffa2bfad6 100644
--- a/media/libopus/celt/kiss_fft.h
+++ b/media/libopus/celt/kiss_fft.h
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <math.h>
 #include "arch.h"
+#include "cpu_support.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -77,17 +78,28 @@ typedef struct {
  4*4*4*2
  */
 
+typedef struct arch_fft_state{
+   int is_supported;
+   void *priv;
+} arch_fft_state;
+
 typedef struct kiss_fft_state{
     int nfft;
-#ifndef FIXED_POINT
-    kiss_fft_scalar scale;
+    opus_val16 scale;
+#ifdef FIXED_POINT
+    int scale_shift;
 #endif
     int shift;
     opus_int16 factors[2*MAXFACTORS];
     const opus_int16 *bitrev;
     const kiss_twiddle_cpx *twiddles;
+    arch_fft_state *arch_fft;
 } kiss_fft_state;
 
+#if defined(HAVE_ARM_NE10)
+#include "arm/fft_arm.h"
+#endif
+
 /*typedef struct kiss_fft_state* kiss_fft_cfg;*/
 
 /**
@@ -113,9 +125,9 @@ typedef struct kiss_fft_state{
  *      buffer size in *lenmem.
  * */
 
-kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
 
-kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
 
 /**
  * opus_fft(cfg,in_out_buf)
@@ -127,10 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
  * Note that each element is complex and can be accessed like
     f[k].r and f[k].i
  * */
-void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
-void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
 
-void opus_fft_free(const kiss_fft_state *cfg);
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+
+void opus_fft_free(const kiss_fft_state *cfg, int arch);
+
+
+void opus_fft_free_arch_c(kiss_fft_state *st);
+int opus_fft_alloc_arch_c(kiss_fft_state *st);
+
+#if !defined(OVERRIDE_OPUS_FFT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
+
+extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+
+#define opus_fft_alloc_arch(_st, arch) \
+         ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+#define opus_fft_free_arch(_st, arch) \
+         ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
+#define opus_fft(_cfg, _fin, _fout, arch) \
+   ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
+
+extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
+#define opus_ifft(_cfg, _fin, _fout, arch) \
+   ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
+
+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+
+#define opus_fft_alloc_arch(_st, arch) \
+         ((void)(arch), opus_fft_alloc_arch_c(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+         ((void)(arch), opus_fft_free_arch_c(_st))
+
+#define opus_fft(_cfg, _fin, _fout, arch) \
+         ((void)(arch), opus_fft_c(_cfg, _fin, _fout))
+
+#define opus_ifft(_cfg, _fin, _fout, arch) \
+         ((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
 
 #ifdef __cplusplus
 }
diff --git a/media/libopus/celt/mdct.c b/media/libopus/celt/mdct.c
index 90a214ad0e..5315ad11a3 100644
--- a/media/libopus/celt/mdct.c
+++ b/media/libopus/celt/mdct.c
@@ -53,76 +53,100 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
+#if defined(MIPSr1_ASM)
+#include "mips/mdct_mipsr1.h"
+#endif
+
+
 #ifdef CUSTOM_MODES
 
-int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
 {
    int i;
-   int N4;
    kiss_twiddle_scalar *trig;
-#if defined(FIXED_POINT)
+   int shift;
    int N2=N>>1;
-#endif
    l->n = N;
-   N4 = N>>2;
    l->maxshift = maxshift;
    for (i=0;i<=maxshift;i++)
    {
       if (i==0)
-         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
       else
-         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
 #ifndef ENABLE_TI_DSPLIB55
       if (l->kfft[i]==NULL)
          return 0;
 #endif
    }
-   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
    if (l->trig==NULL)
      return 0;
-   /* We have enough points that sine isn't necessary */
+   for (shift=0;shift<=maxshift;shift++)
+   {
+      /* We have enough points that sine isn't necessary */
 #if defined(FIXED_POINT)
-   for (i=0;i<=N4;i++)
-      trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#if 1
+      for (i=0;i<N2;i++)
+         trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
 #else
-   for (i=0;i<=N4;i++)
-      trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+      for (i=0;i<N2;i++)
+         trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
 #endif
+#else
+      for (i=0;i<N2;i++)
+         trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
+#endif
+      trig += N2;
+      N2 >>= 1;
+      N >>= 1;
+   }
    return 1;
 }
 
-void clt_mdct_clear(mdct_lookup *l)
+void clt_mdct_clear(mdct_lookup *l, int arch)
 {
    int i;
    for (i=0;i<=l->maxshift;i++)
-      opus_fft_free(l->kfft[i]);
+      opus_fft_free(l->kfft[i], arch);
    opus_free((kiss_twiddle_scalar*)l->trig);
 }
 
 #endif /* CUSTOM_MODES */
 
 /* Forward MDCT trashes the input array */
-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 *window, int overlap, int shift, int stride)
+#ifndef OVERRIDE_clt_mdct_forward
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride, int arch)
 {
    int i;
    int N, N2, N4;
-   kiss_twiddle_scalar sine;
    VARDECL(kiss_fft_scalar, f);
-   VARDECL(kiss_fft_scalar, f2);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
    SAVE_STACK;
+   (void)arch;
+   scale = st->scale;
+
    N = l->n;
-   N >>= shift;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
    N2 = N>>1;
    N4 = N>>2;
+
    ALLOC(f, N2, kiss_fft_scalar);
-   ALLOC(f2, N2, kiss_fft_scalar);
-   /* sin(x) ~= x here */
-#ifdef FIXED_POINT
-   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
-#else
-   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
-#endif
+   ALLOC(f2, N4, kiss_fft_cpx);
 
    /* Consider the input to be composed of four blocks: [a, b, c, d] */
    /* Window, shuffle, fold */
@@ -167,123 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    /* Pre-rotation */
    {
       kiss_fft_scalar * OPUS_RESTRICT yp = f;
-      const kiss_twiddle_scalar *t = &l->trig[0];
+      const kiss_twiddle_scalar *t = &trig[0];
       for(i=0;i<N4;i++)
       {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
          kiss_fft_scalar re, im, yr, yi;
-         re = yp[0];
-         im = yp[1];
-         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);
-         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);
-         /* works because the cos is nearly one */
-         *yp++ = yr + S_MUL(yi,sine);
-         *yp++ = yi - S_MUL(yr,sine);
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+         f2[st->bitrev[i]] = yc;
       }
    }
 
-   /* N/4 complex FFT, down-scales by 4/N */
-   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
+   /* N/4 complex FFT, does not downscale anymore */
+   opus_fft_impl(st, f2);
 
    /* Post-rotate */
    {
       /* Temp pointers to make it really clear to the compiler what we're doing */
-      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
       kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
       kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
-      const kiss_twiddle_scalar *t = &l->trig[0];
+      const kiss_twiddle_scalar *t = &trig[0];
       /* Temp pointers to make it really clear to the compiler what we're doing */
       for(i=0;i<N4;i++)
       {
          kiss_fft_scalar yr, yi;
-         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
-         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
-         /* works because the cos is nearly one */
-         *yp1 = yr - S_MUL(yi,sine);
-         *yp2 = yi + S_MUL(yr,sine);;
-         fp += 2;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
          yp1 += 2*stride;
          yp2 -= 2*stride;
       }
    }
    RESTORE_STACK;
 }
+#endif /* OVERRIDE_clt_mdct_forward */
 
-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+#ifndef OVERRIDE_clt_mdct_backward
+void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
 {
    int i;
    int N, N2, N4;
-   kiss_twiddle_scalar sine;
-   VARDECL(kiss_fft_scalar, f2);
-   SAVE_STACK;
+   const kiss_twiddle_scalar *trig;
+   (void) arch;
+
    N = l->n;
-   N >>= shift;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
    N2 = N>>1;
    N4 = N>>2;
-   ALLOC(f2, N2, kiss_fft_scalar);
-   /* sin(x) ~= x here */
-#ifdef FIXED_POINT
-   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
-#else
-   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
-#endif
 
    /* Pre-rotate */
    {
       /* Temp pointers to make it really clear to the compiler what we're doing */
       const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
       const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
-      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
-      const kiss_twiddle_scalar *t = &l->trig[0];
+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
       for(i=0;i<N4;i++)
       {
+         int rev;
          kiss_fft_scalar yr, yi;
-         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
-         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
-         /* works because the cos is nearly one */
-         *yp++ = yr - S_MUL(yi,sine);
-         *yp++ = yi + S_MUL(yr,sine);
+         rev = *bitrev++;
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+         /* We swap real and imag because we use an FFT instead of an IFFT. */
+         yp[2*rev+1] = yr;
+         yp[2*rev] = yi;
+         /* Storing the pre-rotation directly in the bitrev order. */
          xp1+=2*stride;
          xp2-=2*stride;
       }
    }
 
-   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
-   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
 
    /* Post-rotate and de-shuffle from both ends of the buffer at once to make
       it in-place. */
    {
-      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
-      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
-      const kiss_twiddle_scalar *t = &l->trig[0];
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
       /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
          middle pair will be computed twice. */
       for(i=0;i<(N4+1)>>1;i++)
       {
          kiss_fft_scalar re, im, yr, yi;
          kiss_twiddle_scalar t0, t1;
-         re = yp0[0];
-         im = yp0[1];
-         t0 = t[i<<shift];
-         t1 = t[(N4-i)<<shift];
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp0[1];
+         im = yp0[0];
+         t0 = t[i];
+         t1 = t[N4+i];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL(re,t0) - S_MUL(im,t1);
-         yi = S_MUL(im,t0) + S_MUL(re,t1);
-         re = yp1[0];
-         im = yp1[1];
-         /* works because the cos is nearly one */
-         yp0[0] = -(yr - S_MUL(yi,sine));
-         yp1[1] = yi + S_MUL(yr,sine);
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp1[1];
+         im = yp1[0];
+         yp0[0] = yr;
+         yp1[1] = yi;
 
-         t0 = t[(N4-i-1)<<shift];
-         t1 = t[(i+1)<<shift];
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL(re,t0) - S_MUL(im,t1);
-         yi = S_MUL(im,t0) + S_MUL(re,t1);
-         /* works because the cos is nearly one */
-         yp1[0] = -(yr - S_MUL(yi,sine));
-         yp0[1] = yi + S_MUL(yr,sine);
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
          yp0 += 2;
          yp1 -= 2;
       }
@@ -307,5 +339,5 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
          wp2--;
       }
    }
-   RESTORE_STACK;
 }
+#endif /* OVERRIDE_clt_mdct_backward */
diff --git a/media/libopus/celt/mdct.h b/media/libopus/celt/mdct.h
index d72182138a..160ae4e0f3 100644
--- a/media/libopus/celt/mdct.h
+++ b/media/libopus/celt/mdct.h
@@ -53,18 +53,60 @@ typedef struct {
    const kiss_twiddle_scalar * OPUS_RESTRICT trig;
 } mdct_lookup;
 
-int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
-void clt_mdct_clear(mdct_lookup *l);
+#if defined(HAVE_ARM_NE10)
+#include "arm/mdct_arm.h"
+#endif
+
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
+void clt_mdct_clear(mdct_lookup *l, int arch);
 
 /** Compute a forward MDCT and scale by 4/N, trashes the input array */
-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
-      kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 *window, int overlap, int shift, int stride);
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
+                        kiss_fft_scalar * OPUS_RESTRICT out,
+                        const opus_val16 *window, int overlap,
+                        int shift, int stride, int arch);
 
 /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
     (scales implicitly by 1/2) */
-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
+void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
       kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+      const opus_val16 * OPUS_RESTRICT window,
+      int overlap, int shift, int stride, int arch);
+
+#if !defined(OVERRIDE_OPUS_MDCT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
+
+extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
+      const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+      int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+                                                   _window, _overlap, _shift, \
+                                                   _stride, _arch))
+
+extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
+      const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+      int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   (*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+                                                   _window, _overlap, _shift, \
+                                                   _stride, _arch)
+
+#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
+#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
 
 #endif
diff --git a/media/libopus/celt/mips/celt_mipsr1.h b/media/libopus/celt/mips/celt_mipsr1.h
new file mode 100644
index 0000000000..e85661a661
--- /dev/null
+++ b/media/libopus/celt/mips/celt_mipsr1.h
@@ -0,0 +1,151 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CELT_MIPSR1_H__
+#define __CELT_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#define OVERRIDE_comb_filter
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap, int arch)
+{
+   int i;
+   opus_val32 x0, x1, x2, x3, x4;
+
+   (void)arch;
+
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
+
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      opus_val32 res;
+      f = MULT16_16_Q15(window[i],window[i]);
+      x0= x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
+
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+
+      y[i] = x[i] + res;
+
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+   x4 = x[i-T1-2];
+   x3 = x[i-T1-1];
+   x2 = x[i-T1];
+   x1 = x[i-T1+1];
+
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   for (i=overlap;i<N;i++)
+   {
+      opus_val32 res;
+      x0=x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+      y[i] = x[i] + res;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+}
+
+#endif /* __CELT_MIPSR1_H__ */
diff --git a/media/libopus/celt/mips/fixed_generic_mipsr1.h b/media/libopus/celt/mips/fixed_generic_mipsr1.h
new file mode 100644
index 0000000000..4a05efbf85
--- /dev/null
+++ b/media/libopus/celt/mips/fixed_generic_mipsr1.h
@@ -0,0 +1,126 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_FIXED_GENERIC_MIPSR1_H
+#define CELT_FIXED_GENERIC_MIPSR1_H
+
+#undef MULT16_32_Q15_ADD
+static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_32_Q15_SUB
+static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_16_Q15_ADD
+static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_16_Q15_SUB
+static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+
+#undef MULT16_32_Q16
+static inline int MULT16_32_Q16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_P16
+static inline int MULT16_32_P16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_Q15
+static inline int MULT16_32_Q15(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
+    return c;
+}
+
+#undef MULT32_32_Q31
+static inline int MULT32_32_Q31(int a, int b)
+{
+    int r;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
+    return r;
+}
+
+#undef PSHR32
+static inline int PSHR32(int a, int shift)
+{
+    int r;
+    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
+    return r;
+}
+
+#undef MULT16_16_P15
+static inline int MULT16_16_P15(int a, int b)
+{
+    int r;
+    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
+    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));
+    return r;
+}
+
+#endif /* CELT_FIXED_GENERIC_MIPSR1_H */
diff --git a/media/libopus/celt/mips/kiss_fft_mipsr1.h b/media/libopus/celt/mips/kiss_fft_mipsr1.h
new file mode 100644
index 0000000000..400ca4de9c
--- /dev/null
+++ b/media/libopus/celt/mips/kiss_fft_mipsr1.h
@@ -0,0 +1,167 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_MIPSR1_H
+#define KISS_FFT_MIPSR1_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
+#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
+
+#undef S_MUL_ADD
+static inline int S_MUL_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef S_MUL_SUB
+static inline int S_MUL_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef C_MUL
+#   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
+static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+#undef C_MULC
+#   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
+static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+
+#endif /* FIXED_POINT */
+
+#define OVERRIDE_kf_bfly5
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+#ifdef FIXED_POINT
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
+
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+         scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
+
+         scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
+         scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
+
+         scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
+         scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+
+#endif /* KISS_FFT_MIPSR1_H */
diff --git a/media/libopus/celt/mips/mdct_mipsr1.h b/media/libopus/celt/mips/mdct_mipsr1.h
new file mode 100644
index 0000000000..2934dab776
--- /dev/null
+++ b/media/libopus/celt/mips/mdct_mipsr1.h
@@ -0,0 +1,288 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+#ifndef __MDCT_MIPSR1_H__
+#define __MDCT_MIPSR1_H__
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* Forward MDCT trashes the input array */
+#define OVERRIDE_clt_mdct_forward
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+
+    (void)arch;
+
+   SAVE_STACK;
+   scale = st->scale;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+          *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
+          *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+          *yp++ =  S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
+          *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+
+         yr = S_MUL_SUB(re,t0,im,t1);
+         yi = S_MUL_ADD(im,t0,re,t1);
+
+         yc.r = yr;
+         yc.i = yi;
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+         f2[st->bitrev[i]] = yc;
+      }
+   }
+
+   /* N/4 complex FFT, does not downscale anymore */
+   opus_fft_impl(st, f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
+         yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+#define OVERRIDE_clt_mdct_backward
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   const kiss_twiddle_scalar *trig;
+
+    (void)arch;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
+      for(i=0;i<N4;i++)
+      {
+         int rev;
+         kiss_fft_scalar yr, yi;
+         rev = *bitrev++;
+         yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
+         yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
+         /* We swap real and imag because we use an FFT instead of an IFFT. */
+         yp[2*rev+1] = yr;
+         yp[2*rev] = yi;
+         /* Storing the pre-rotation directly in the bitrev order. */
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp0[1];
+         im = yp0[0];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0 , im,t1);
+         yi = S_MUL_SUB(re,t1 , im,t0);
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp1[1];
+         im = yp1[0];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0,im,t1);
+         yi = S_MUL_SUB(re,t1,im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+}
+#endif /* __MDCT_MIPSR1_H__ */
diff --git a/media/libopus/celt/mips/pitch_mipsr1.h b/media/libopus/celt/mips/pitch_mipsr1.h
new file mode 100644
index 0000000000..a9500aff58
--- /dev/null
+++ b/media/libopus/celt/mips/pitch_mipsr1.h
@@ -0,0 +1,161 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_MIPSR1_H
+#define PITCH_MIPSR1_H
+
+#define OVERRIDE_DUAL_INNER_PROD
+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
+{
+   int j;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+
+   (void)arch;
+
+   asm volatile("MULT $ac1, $0, $0");
+   asm volatile("MULT $ac2, $0, $0");
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   for (j=0;j<N;j++)
+   {
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+      ++j;
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+   }
+   asm volatile ("mflo %0, $ac1": "=r"(xy01));
+   asm volatile ("mflo %0, $ac2": "=r"(xy02));
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+
+static inline void xcorr_kernel_mips(const opus_val16 * x,
+      const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+
+    opus_int64 sum_0, sum_1, sum_2, sum_3;
+    sum_0 =  (opus_int64)sum[0];
+    sum_1 =  (opus_int64)sum[1];
+    sum_2 =  (opus_int64)sum[2];
+    sum_3 =  (opus_int64)sum[3];
+
+    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+    y_0=*y++;
+    y_1=*y++;
+    y_2=*y++;
+    for (j=0;j<len-3;j+=4)
+    {
+        opus_val16 tmp;
+        tmp = *x++;
+        y_3=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+
+        tmp=*x++;
+        y_0=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+
+       tmp=*x++;
+       y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+
+      tmp=*x++;
+      y_2=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
+
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+   }
+
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+
+      sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+      sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+      sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+      sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+   }
+
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+   }
+
+   sum[0] = (opus_val32)sum_0;
+   sum[1] = (opus_val32)sum_1;
+   sum[2] = (opus_val32)sum_2;
+   sum[3] = (opus_val32)sum_3;
+}
+
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
+
+#endif /* PITCH_MIPSR1_H */
diff --git a/media/libopus/celt/mips/vq_mipsr1.h b/media/libopus/celt/mips/vq_mipsr1.h
new file mode 100644
index 0000000000..54cef86133
--- /dev/null
+++ b/media/libopus/celt/mips/vq_mipsr1.h
@@ -0,0 +1,125 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __VQ_MIPSR1_H__
+#define __VQ_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+#include "arch.h"
+
+static unsigned extract_collapse_mask(int *iy, int N, int B);
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);
+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
+static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch);
+
+#define OVERRIDE_vq_exp_rotation1
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   opus_val16 ms;
+   celt_norm *Xptr;
+   Xptr = X;
+   ms = NEG16(s);
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+}
+
+#define OVERRIDE_renormalise_vector
+
+#define renormalise_vector(X, N, gain, arch) \
+ (renormalise_vector_mips(X, N, gain, arch))
+
+void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 E = EPSILON;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr = X;
+   int X0, X1;
+
+   (void)arch;
+
+   asm volatile("mult $ac1, $0, $0");
+   asm volatile("MTLO %0, $ac1" : :"r" (E));
+   /*if(N %4)
+       printf("error");*/
+   for (i=0;i<N-2;i+=2)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+
+      X1 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
+   }
+
+   for (;i<N;i++)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+   }
+
+   asm volatile("MFLO %0, $ac1" : "=r" (E));
+#ifdef FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+
+#endif /* __VQ_MIPSR1_H__ */
diff --git a/media/libopus/celt/modes.c b/media/libopus/celt/modes.c
index 42e68e1cb7..911686e905 100644
--- a/media/libopus/celt/modes.c
+++ b/media/libopus/celt/modes.c
@@ -37,6 +37,7 @@
 #include "os_support.h"
 #include "stack_alloc.h"
 #include "quant_bands.h"
+#include "cpu_support.h"
 
 static const opus_int16 eband5ms[] = {
 /*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
@@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
    opus_val16 *window;
    opus_int16 *logN;
    int LM;
+   int arch = opus_select_arch();
    ALLOC_STACK;
 #if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
    if (global_stack==NULL)
@@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
    compute_pulse_cache(mode, mode->maxLM);
 
    if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
-           mode->maxLM) == 0)
+           mode->maxLM, arch) == 0)
       goto failure;
 
    if (error)
@@ -408,6 +410,8 @@ failure:
 #ifdef CUSTOM_MODES
 void opus_custom_mode_destroy(CELTMode *mode)
 {
+   int arch = opus_select_arch();
+
    if (mode == NULL)
       return;
 #ifndef CUSTOM_MODES_ONLY
@@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
    opus_free((opus_int16*)mode->cache.index);
    opus_free((unsigned char*)mode->cache.bits);
    opus_free((unsigned char*)mode->cache.caps);
-   clt_mdct_clear(&mode->mdct);
+   clt_mdct_clear(&mode->mdct, arch);
 
    opus_free((CELTMode *)mode);
 }
diff --git a/media/libopus/celt/modes.h b/media/libopus/celt/modes.h
index c8340f9875..be813ccc8b 100644
--- a/media/libopus/celt/modes.h
+++ b/media/libopus/celt/modes.h
@@ -39,14 +39,6 @@
 
 #define MAX_PERIOD 1024
 
-#ifndef OVERLAP
-#define OVERLAP(mode) ((mode)->overlap)
-#endif
-
-#ifndef FRAMESIZE
-#define FRAMESIZE(mode) ((mode)->mdctSize)
-#endif
-
 typedef struct {
    int size;
    const opus_int16 *index;
diff --git a/media/libopus/celt/os_support.h b/media/libopus/celt/os_support.h
index 5e47e3cff9..a2171971e9 100644
--- a/media/libopus/celt/os_support.h
+++ b/media/libopus/celt/os_support.h
@@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr)
 }
 #endif
 
-/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking  */
+/** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
 #ifndef OVERRIDE_OPUS_COPY
 #define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
 #endif
 
-/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
+/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
     provides compile-time type checking */
 #ifndef OVERRIDE_OPUS_MOVE
 #define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
 #endif
 
-/** Set n elements of dst to zero, starting at address s */
+/** Set n elements of dst to zero */
 #ifndef OVERRIDE_OPUS_CLEAR
 #define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
 #endif
diff --git a/media/libopus/celt/pitch.c b/media/libopus/celt/pitch.c
index d2b305441d..1d89cb0342 100644
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
    celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
 }
 
-#if 0 /* This is a simple version of the pitch correlation that should work
-         well on DSPs like Blackfin and TI C5x/C6x */
-
+/* Pure C implementation. */
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+#if defined(OVERRIDE_PITCH_XCORR)
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch)
+#else
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch)
+#endif
 {
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+         well on DSPs like Blackfin and TI C5x/C6x */
    int i, j;
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
+#endif
+#if !defined(OVERRIDE_PITCH_XCORR)
+   (void)arch;
 #endif
    for (i=0;i<max_pitch;i++)
    {
       opus_val32 sum = 0;
       for (j=0;j<len;j++)
-         sum = MAC16_16(sum, x[j],y[i+j]);
+         sum = MAC16_16(sum, _x[j], _y[i+j]);
       xcorr[i] = sum;
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -241,30 +251,25 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
 #ifdef FIXED_POINT
    return maxcorr;
 #endif
-}
 
 #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
-
-#ifdef FIXED_POINT
-opus_val32
-#else
-void
-#endif
-celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
-{
-   int i,j;
+   int i;
    /*The EDSP version requires that max_pitch is at least 1, and that _x is
       32-bit aligned.
      Since it's hard to put asserts in assembly, put them here.*/
-   celt_assert(max_pitch>0);
-   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
 #endif
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
-      xcorr_kernel(_x, _y+i, sum, len);
+#if defined(OVERRIDE_PITCH_XCORR)
+      xcorr_kernel_c(_x, _y+i, sum, len);
+#else
+      xcorr_kernel(_x, _y+i, sum, len, arch);
+#endif
       xcorr[i]=sum[0];
       xcorr[i+1]=sum[1];
       xcorr[i+2]=sum[2];
@@ -279,9 +284,12 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
    /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
    for (;i<max_pitch;i++)
    {
-      opus_val32 sum = 0;
-      for (j=0;j<len;j++)
-         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      opus_val32 sum;
+#if defined(OVERRIDE_PITCH_XCORR)
+      sum = celt_inner_prod_c(_x, _y+i, len);
+#else
+      sum = celt_inner_prod(_x, _y+i, len, arch);
+#endif
       xcorr[i] = sum;
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
 #ifdef FIXED_POINT
    return maxcorr;
 #endif
+#endif
 }
 
-#endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                   int len, int max_pitch, int *pitch, int arch)
 {
@@ -361,12 +369,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #endif
    for (i=0;i<max_pitch>>1;i++)
    {
-      opus_val32 sum=0;
+      opus_val32 sum;
       xcorr[i] = 0;
       if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
          continue;
+#ifdef FIXED_POINT
+      sum = 0;
       for (j=0;j<len>>1;j++)
          sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+#else
+      sum = celt_inner_prod_c(x_lp, y+i, len>>1);
+#endif
       xcorr[i] = MAX32(-1, sum);
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -401,7 +414,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 
 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
-      int N, int *T0_, int prev_period, opus_val16 prev_gain)
+      int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
 {
    int k, i, T, T0;
    opus_val16 g, g0;
@@ -426,7 +439,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
 
    T = T0 = *T0_;
    ALLOC(yy_lookup, maxperiod+1, opus_val32);
-   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
    yy_lookup[0] = xx;
    yy=xx;
    for (i=1;i<=maxperiod;i++)
@@ -456,7 +469,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       opus_val16 g1;
       opus_val16 cont=0;
       opus_val16 thresh;
-      T1 = (2*T0+k)/(2*k);
+      T1 = celt_udiv(2*T0+k, 2*k);
       if (T1 < minperiod)
          break;
       /* Look for another strong correlation at T1b */
@@ -468,9 +481,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
             T1b = T0+T1;
       } else
       {
-         T1b = (2*second_check[k]*T0+k)/(2*k);
+         T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
       }
-      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
       xy += xy2;
       yy = yy_lookup[T1] + yy_lookup[T1b];
 #ifdef FIXED_POINT
@@ -513,13 +526,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       pg = SHR32(frac_div32(best_xy,best_yy+1),16);
 
    for (k=0;k<3;k++)
-   {
-      int T1 = T+k-1;
-      xy = 0;
-      for (i=0;i<N;i++)
-         xy = MAC16_16(xy, x[i], x[i-T1]);
-      xcorr[k] = xy;
-   }
+      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
    if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
       offset = 1;
    else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
diff --git a/media/libopus/celt/pitch.h b/media/libopus/celt/pitch.h
index df317ecc1d..65a77a6ecc 100644
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@@ -37,11 +37,17 @@
 #include "modes.h"
 #include "cpu_support.h"
 
-#if defined(__SSE__) && !defined(FIXED_POINT)
+#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \
+  || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT))
 #include "x86/pitch_sse.h"
 #endif
 
-#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
+#if defined(MIPSr1_ASM)
+#include "mips/pitch_mipsr1.h"
+#endif
+
+#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
+  || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 # include "arm/pitch_arm.h"
 #endif
 
@@ -52,12 +58,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
                   int len, int max_pitch, int *pitch, int arch);
 
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
-      int N, int *T0, int prev_period, opus_val16 prev_gain);
+      int N, int *T0, int prev_period, opus_val16 prev_gain, int arch);
+
 
 /* OPT: This is the kernel you really want to optimize. It gets used a lot
    by the prefilter and by the PLC. */
-#ifndef OVERRIDE_XCORR_KERNEL
-static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
 {
    int j;
    opus_val16 y_0, y_1, y_2, y_3;
@@ -122,10 +128,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y,
       sum[3] = MAC16_16(sum[3],tmp,y_1);
    }
 }
+
+#ifndef OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)(arch),xcorr_kernel_c(x, y, sum, len))
 #endif /* OVERRIDE_XCORR_KERNEL */
 
-#ifndef OVERRIDE_DUAL_INNER_PROD
-static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+
+static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
       int N, opus_val32 *xy1, opus_val32 *xy2)
 {
    int i;
@@ -139,8 +149,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
    *xy1 = xy01;
    *xy2 = xy02;
 }
+
+#ifndef OVERRIDE_DUAL_INNER_PROD
+# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
+    ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2))
 #endif
 
+/*We make sure a C version is always available for cases where the overhead of
+  vectorization and passing around an arch flag aren't worth it.*/
+static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x,
+      const opus_val16 *y, int N)
+{
+   int i;
+   opus_val32 xy=0;
+   for (i=0;i<N;i++)
+      xy = MAC16_16(xy, x[i], y[i]);
+   return xy;
+}
+
+#if !defined(OVERRIDE_CELT_INNER_PROD)
+# define celt_inner_prod(x, y, N, arch) \
+    ((void)(arch),celt_inner_prod_c(x, y, N))
+#endif
+
+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+     opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+
 #ifdef FIXED_POINT
 opus_val32
 #else
@@ -151,7 +188,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
 
 #if !defined(OVERRIDE_PITCH_XCORR)
 /*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD)
+# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
+   || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
+   && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
 extern
 #  if defined(FIXED_POINT)
 opus_val32
@@ -161,12 +200,20 @@ void
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
       const opus_val16 *, opus_val32 *, int, int);
 
+#  define OVERRIDE_PITCH_XCORR
 #  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
         xcorr, len, max_pitch))
 # else
-#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch);
+
 # endif
 #endif
 
diff --git a/media/libopus/celt/quant_bands.c b/media/libopus/celt/quant_bands.c
index ac6952c266..95076e0af2 100644
--- a/media/libopus/celt/quant_bands.c
+++ b/media/libopus/celt/quant_bands.c
@@ -292,7 +292,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
 #endif
    }
    if (lfe)
-      max_decay=3;
+      max_decay = QCONST16(3.f,DB_SHIFT);
    enc_start_state = *enc;
 
    ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
diff --git a/media/libopus/celt/rate.c b/media/libopus/celt/rate.c
index 1055e63d60..4192120ce8 100644
--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@@ -131,7 +131,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
    for (i=0;i<nbEntries;i++)
    {
       unsigned char *ptr = bits+entryI[i];
-      opus_int16 tmp[MAX_PULSES+1];
+      opus_int16 tmp[CELT_MAX_PULSES+1];
       get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
       for (j=1;j<=entryK[i];j++)
          ptr[j] = tmp[get_pulses(j)]-1;
@@ -333,7 +333,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
       /*Figure out how many left-over bits we would be adding to this band.
         This can include bits we've stolen back from higher, skipped bands.*/
       left = total-psum;
-      percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+      percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
       left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
       rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
       band_width = m->eBands[codedBands]-m->eBands[j];
@@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
 
    /* Allocate the remaining bits */
    left = total-psum;
-   percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+   percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
    left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
    for (j=start;j<codedBands;j++)
       bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
@@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
             offset += NClogN>>3;
 
          /* Divide with rounding */
-         ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
+         ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))));
+         ebits[j] = celt_udiv(ebits[j], den)>>BITRES;
 
          /* Make sure not to bust */
          if (C*ebits[j] > (bits[j]>>BITRES))
diff --git a/media/libopus/celt/rate.h b/media/libopus/celt/rate.h
index f1e0661129..515f7687ce 100644
--- a/media/libopus/celt/rate.h
+++ b/media/libopus/celt/rate.h
@@ -32,7 +32,7 @@
 #define MAX_PSEUDO 40
 #define LOG_MAX_PSEUDO 6
 
-#define MAX_PULSES 128
+#define CELT_MAX_PULSES 128
 
 #define MAX_FINE_BITS 8
 
diff --git a/media/libopus/celt/stack_alloc.h b/media/libopus/celt/stack_alloc.h
index 316a6ce12c..2b51c8d80c 100644
--- a/media/libopus/celt/stack_alloc.h
+++ b/media/libopus/celt/stack_alloc.h
@@ -116,9 +116,11 @@
 #else
 
 #ifdef CELT_C
+char *scratch_ptr=0;
 char *global_stack=0;
 #else
 extern char *global_stack;
+extern char *scratch_ptr;
 #endif /* CELT_C */
 
 #ifdef ENABLE_VALGRIND
@@ -140,8 +142,12 @@ extern char *global_stack_top;
 
 #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
 #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#if 0 /* Set this to 1 to instrument pseudostack usage */
+#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
+#else
 #define RESTORE_STACK (global_stack = _saved_stack)
-#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
+#endif
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack;
 
 #endif /* ENABLE_VALGRIND */
 
diff --git a/media/libopus/celt/static_modes_fixed.h b/media/libopus/celt/static_modes_fixed.h
index 216df9e605..8717d626cb 100644
--- a/media/libopus/celt/static_modes_fixed.h
+++ b/media/libopus/celt/static_modes_fixed.h
@@ -4,6 +4,11 @@
 #include "modes.h"
 #include "rate.h"
 
+#ifdef HAVE_ARM_NE10
+#define OVERRIDE_FFT 1
+#include "static_modes_fixed_arm_ne10.h"
+#endif
+
 #ifndef DEF_WINDOW120
 #define DEF_WINDOW120
 static const opus_val16 window120[120] = {
@@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
 #ifndef FFT_BITREV480
 #define FFT_BITREV480
 static const opus_int16 fft_bitrev480[480] = {
-0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
-450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
-345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
-215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
-110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
-430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
-325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
-181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
-76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
-396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
-291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
-161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
-56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
-362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
-257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
-127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
-22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
-472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
-342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
-237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
-93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
-438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
-308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
-203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
-73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
-418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
-274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
-169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
-39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
-384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
-254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
-149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
+8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
+16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
+24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
+4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
+12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
+20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
+28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
+1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
+9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
+17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
+25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
+5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
+13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
+21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
+29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
+2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
+10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
+18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
+26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
+6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
+14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
+22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
+30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
+3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
+11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
+19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
+27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
+7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
+15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
+23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
+31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
 };
 #endif
 
 #ifndef FFT_BITREV240
 #define FFT_BITREV240
 static const opus_int16 fft_bitrev240[240] = {
-0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
-225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
-170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
-115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
-46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
-216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
-161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
-92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
-37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
-207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
-138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
-83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
-28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
-184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
-129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
-74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
+4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
+8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
+12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
+1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
+5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
+9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
+13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
+2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
+6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
+10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
+14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
+3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
+7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
+11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
+15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
 };
 #endif
 
 #ifndef FFT_BITREV120
 #define FFT_BITREV120
 static const opus_int16 fft_bitrev120[120] = {
-0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
-110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
-76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
-56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
-22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
-93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
-73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
-39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
+4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
+1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
+5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
+2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
+6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
+3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
+7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
 };
 #endif
 
 #ifndef FFT_BITREV60
 #define FFT_BITREV60
 static const opus_int16 fft_bitrev60[60] = {
-0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
-46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
-37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
-28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
+1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
+2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
+3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
 };
 #endif
 
@@ -426,10 +431,17 @@ static const opus_int16 fft_bitrev60[60] = {
 #define FFT_STATE48000_960_0
 static const kiss_fft_state fft_state48000_960_0 = {
 480,    /* nfft */
+17476,    /* scale */
+8,      /* scale_shift */
 -1,     /* shift */
-{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev480,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_480,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -437,10 +449,17 @@ fft_twiddles48000_960,  /* bitrev */
 #define FFT_STATE48000_960_1
 static const kiss_fft_state fft_state48000_960_1 = {
 240,    /* nfft */
+17476,    /* scale */
+7,      /* scale_shift */
 1,      /* shift */
-{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev240,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_240,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -448,10 +467,17 @@ fft_twiddles48000_960,  /* bitrev */
 #define FFT_STATE48000_960_2
 static const kiss_fft_state fft_state48000_960_2 = {
 120,    /* nfft */
+17476,    /* scale */
+6,      /* scale_shift */
 2,      /* shift */
-{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev120,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_120,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -459,10 +485,17 @@ fft_twiddles48000_960,  /* bitrev */
 #define FFT_STATE48000_960_3
 static const kiss_fft_state fft_state48000_960_3 = {
 60,     /* nfft */
+17476,    /* scale */
+5,      /* scale_shift */
 3,      /* shift */
-{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev60,   /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_60,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -470,104 +503,368 @@ fft_twiddles48000_960,  /* bitrev */
 
 #ifndef MDCT_TWIDDLES960
 #define MDCT_TWIDDLES960
-static const opus_val16 mdct_twiddles960[481] = {
-32767, 32767, 32767, 32767, 32766,
-32763, 32762, 32759, 32757, 32753,
-32751, 32747, 32743, 32738, 32733,
-32729, 32724, 32717, 32711, 32705,
-32698, 32690, 32683, 32676, 32667,
-32658, 32650, 32640, 32631, 32620,
-32610, 32599, 32588, 32577, 32566,
-32554, 32541, 32528, 32515, 32502,
-32487, 32474, 32459, 32444, 32429,
-32413, 32397, 32381, 32364, 32348,
-32331, 32313, 32294, 32277, 32257,
-32239, 32219, 32200, 32180, 32159,
-32138, 32118, 32096, 32074, 32051,
-32029, 32006, 31984, 31960, 31936,
-31912, 31888, 31863, 31837, 31812,
-31786, 31760, 31734, 31707, 31679,
-31652, 31624, 31596, 31567, 31539,
-31508, 31479, 31450, 31419, 31388,
-31357, 31326, 31294, 31262, 31230,
-31198, 31164, 31131, 31097, 31063,
-31030, 30994, 30959, 30924, 30889,
-30853, 30816, 30779, 30743, 30705,
-30668, 30629, 30592, 30553, 30515,
-30475, 30435, 30396, 30356, 30315,
-30274, 30233, 30191, 30149, 30107,
-30065, 30022, 29979, 29936, 29891,
-29847, 29803, 29758, 29713, 29668,
-29622, 29577, 29529, 29483, 29436,
-29390, 29341, 29293, 29246, 29197,
-29148, 29098, 29050, 29000, 28949,
-28899, 28848, 28797, 28746, 28694,
-28642, 28590, 28537, 28485, 28432,
-28378, 28324, 28271, 28217, 28162,
-28106, 28051, 27995, 27940, 27884,
-27827, 27770, 27713, 27657, 27598,
-27540, 27481, 27423, 27365, 27305,
-27246, 27187, 27126, 27066, 27006,
-26945, 26883, 26822, 26760, 26698,
-26636, 26574, 26510, 26448, 26383,
-26320, 26257, 26191, 26127, 26062,
-25997, 25931, 25866, 25800, 25734,
-25667, 25601, 25533, 25466, 25398,
-25330, 25262, 25194, 25125, 25056,
-24987, 24917, 24848, 24778, 24707,
-24636, 24566, 24495, 24424, 24352,
-24280, 24208, 24135, 24063, 23990,
-23917, 23842, 23769, 23695, 23622,
-23546, 23472, 23398, 23322, 23246,
-23171, 23095, 23018, 22942, 22866,
-22788, 22711, 22634, 22557, 22478,
-22400, 22322, 22244, 22165, 22085,
-22006, 21927, 21846, 21766, 21687,
-21606, 21524, 21443, 21363, 21282,
-21199, 21118, 21035, 20954, 20870,
-20788, 20705, 20621, 20538, 20455,
-20371, 20286, 20202, 20118, 20034,
-19947, 19863, 19777, 19692, 19606,
-19520, 19434, 19347, 19260, 19174,
-19088, 18999, 18911, 18825, 18737,
-18648, 18560, 18472, 18384, 18294,
-18205, 18116, 18025, 17936, 17846,
-17757, 17666, 17576, 17485, 17395,
-17303, 17212, 17122, 17030, 16937,
-16846, 16755, 16662, 16569, 16477,
-16385, 16291, 16198, 16105, 16012,
-15917, 15824, 15730, 15636, 15541,
-15447, 15352, 15257, 15162, 15067,
-14973, 14875, 14781, 14685, 14589,
-14493, 14396, 14300, 14204, 14107,
-14010, 13914, 13815, 13718, 13621,
-13524, 13425, 13328, 13230, 13133,
-13033, 12935, 12836, 12738, 12638,
-12540, 12441, 12341, 12241, 12142,
-12044, 11943, 11843, 11744, 11643,
-11542, 11442, 11342, 11241, 11139,
-11039, 10939, 10836, 10736, 10635,
-10534, 10431, 10330, 10228, 10127,
-10024, 9921, 9820, 9718, 9614,
-9512, 9410, 9306, 9204, 9101,
-8998, 8895, 8791, 8689, 8585,
-8481, 8377, 8274, 8171, 8067,
-7962, 7858, 7753, 7650, 7545,
-7441, 7336, 7231, 7129, 7023,
-6917, 6813, 6709, 6604, 6498,
-6393, 6288, 6182, 6077, 5973,
-5867, 5760, 5656, 5549, 5445,
-5339, 5232, 5127, 5022, 4914,
-4809, 4703, 4596, 4490, 4384,
-4278, 4171, 4065, 3958, 3852,
-3745, 3640, 3532, 3426, 3318,
-3212, 3106, 2998, 2891, 2786,
-2679, 2570, 2465, 2358, 2251,
-2143, 2037, 1929, 1823, 1715,
-1609, 1501, 1393, 1287, 1180,
-1073, 964, 858, 751, 644,
-535, 429, 322, 214, 107,
-0, };
+static const opus_val16 mdct_twiddles960[1800] = {
+32767, 32767, 32767, 32766, 32765,
+32763, 32761, 32759, 32756, 32753,
+32750, 32746, 32742, 32738, 32733,
+32728, 32722, 32717, 32710, 32704,
+32697, 32690, 32682, 32674, 32666,
+32657, 32648, 32639, 32629, 32619,
+32609, 32598, 32587, 32576, 32564,
+32552, 32539, 32526, 32513, 32500,
+32486, 32472, 32457, 32442, 32427,
+32411, 32395, 32379, 32362, 32345,
+32328, 32310, 32292, 32274, 32255,
+32236, 32217, 32197, 32177, 32157,
+32136, 32115, 32093, 32071, 32049,
+32027, 32004, 31981, 31957, 31933,
+31909, 31884, 31859, 31834, 31809,
+31783, 31756, 31730, 31703, 31676,
+31648, 31620, 31592, 31563, 31534,
+31505, 31475, 31445, 31415, 31384,
+31353, 31322, 31290, 31258, 31226,
+31193, 31160, 31127, 31093, 31059,
+31025, 30990, 30955, 30920, 30884,
+30848, 30812, 30775, 30738, 30701,
+30663, 30625, 30587, 30548, 30509,
+30470, 30430, 30390, 30350, 30309,
+30269, 30227, 30186, 30144, 30102,
+30059, 30016, 29973, 29930, 29886,
+29842, 29797, 29752, 29707, 29662,
+29616, 29570, 29524, 29477, 29430,
+29383, 29335, 29287, 29239, 29190,
+29142, 29092, 29043, 28993, 28943,
+28892, 28842, 28791, 28739, 28688,
+28636, 28583, 28531, 28478, 28425,
+28371, 28317, 28263, 28209, 28154,
+28099, 28044, 27988, 27932, 27876,
+27820, 27763, 27706, 27648, 27591,
+27533, 27474, 27416, 27357, 27298,
+27238, 27178, 27118, 27058, 26997,
+26936, 26875, 26814, 26752, 26690,
+26628, 26565, 26502, 26439, 26375,
+26312, 26247, 26183, 26119, 26054,
+25988, 25923, 25857, 25791, 25725,
+25658, 25592, 25524, 25457, 25389,
+25322, 25253, 25185, 25116, 25047,
+24978, 24908, 24838, 24768, 24698,
+24627, 24557, 24485, 24414, 24342,
+24270, 24198, 24126, 24053, 23980,
+23907, 23834, 23760, 23686, 23612,
+23537, 23462, 23387, 23312, 23237,
+23161, 23085, 23009, 22932, 22856,
+22779, 22701, 22624, 22546, 22468,
+22390, 22312, 22233, 22154, 22075,
+21996, 21916, 21836, 21756, 21676,
+21595, 21515, 21434, 21352, 21271,
+21189, 21107, 21025, 20943, 20860,
+20777, 20694, 20611, 20528, 20444,
+20360, 20276, 20192, 20107, 20022,
+19937, 19852, 19767, 19681, 19595,
+19509, 19423, 19336, 19250, 19163,
+19076, 18988, 18901, 18813, 18725,
+18637, 18549, 18460, 18372, 18283,
+18194, 18104, 18015, 17925, 17835,
+17745, 17655, 17565, 17474, 17383,
+17292, 17201, 17110, 17018, 16927,
+16835, 16743, 16650, 16558, 16465,
+16372, 16279, 16186, 16093, 15999,
+15906, 15812, 15718, 15624, 15529,
+15435, 15340, 15245, 15150, 15055,
+14960, 14864, 14769, 14673, 14577,
+14481, 14385, 14288, 14192, 14095,
+13998, 13901, 13804, 13706, 13609,
+13511, 13414, 13316, 13218, 13119,
+13021, 12923, 12824, 12725, 12626,
+12527, 12428, 12329, 12230, 12130,
+12030, 11930, 11831, 11730, 11630,
+11530, 11430, 11329, 11228, 11128,
+11027, 10926, 10824, 10723, 10622,
+10520, 10419, 10317, 10215, 10113,
+10011, 9909, 9807, 9704, 9602,
+9499, 9397, 9294, 9191, 9088,
+8985, 8882, 8778, 8675, 8572,
+8468, 8364, 8261, 8157, 8053,
+7949, 7845, 7741, 7637, 7532,
+7428, 7323, 7219, 7114, 7009,
+6905, 6800, 6695, 6590, 6485,
+6380, 6274, 6169, 6064, 5958,
+5853, 5747, 5642, 5536, 5430,
+5325, 5219, 5113, 5007, 4901,
+4795, 4689, 4583, 4476, 4370,
+4264, 4157, 4051, 3945, 3838,
+3732, 3625, 3518, 3412, 3305,
+3198, 3092, 2985, 2878, 2771,
+2664, 2558, 2451, 2344, 2237,
+2130, 2023, 1916, 1809, 1702,
+1594, 1487, 1380, 1273, 1166,
+1059, 952, 844, 737, 630,
+523, 416, 308, 201, 94,
+-13, -121, -228, -335, -442,
+-550, -657, -764, -871, -978,
+-1086, -1193, -1300, -1407, -1514,
+-1621, -1728, -1835, -1942, -2049,
+-2157, -2263, -2370, -2477, -2584,
+-2691, -2798, -2905, -3012, -3118,
+-3225, -3332, -3439, -3545, -3652,
+-3758, -3865, -3971, -4078, -4184,
+-4290, -4397, -4503, -4609, -4715,
+-4821, -4927, -5033, -5139, -5245,
+-5351, -5457, -5562, -5668, -5774,
+-5879, -5985, -6090, -6195, -6301,
+-6406, -6511, -6616, -6721, -6826,
+-6931, -7036, -7140, -7245, -7349,
+-7454, -7558, -7663, -7767, -7871,
+-7975, -8079, -8183, -8287, -8390,
+-8494, -8597, -8701, -8804, -8907,
+-9011, -9114, -9217, -9319, -9422,
+-9525, -9627, -9730, -9832, -9934,
+-10037, -10139, -10241, -10342, -10444,
+-10546, -10647, -10748, -10850, -10951,
+-11052, -11153, -11253, -11354, -11455,
+-11555, -11655, -11756, -11856, -11955,
+-12055, -12155, -12254, -12354, -12453,
+-12552, -12651, -12750, -12849, -12947,
+-13046, -13144, -13242, -13340, -13438,
+-13536, -13633, -13731, -13828, -13925,
+-14022, -14119, -14216, -14312, -14409,
+-14505, -14601, -14697, -14793, -14888,
+-14984, -15079, -15174, -15269, -15364,
+-15459, -15553, -15647, -15741, -15835,
+-15929, -16023, -16116, -16210, -16303,
+-16396, -16488, -16581, -16673, -16766,
+-16858, -16949, -17041, -17133, -17224,
+-17315, -17406, -17497, -17587, -17678,
+-17768, -17858, -17948, -18037, -18127,
+-18216, -18305, -18394, -18483, -18571,
+-18659, -18747, -18835, -18923, -19010,
+-19098, -19185, -19271, -19358, -19444,
+-19531, -19617, -19702, -19788, -19873,
+-19959, -20043, -20128, -20213, -20297,
+-20381, -20465, -20549, -20632, -20715,
+-20798, -20881, -20963, -21046, -21128,
+-21210, -21291, -21373, -21454, -21535,
+-21616, -21696, -21776, -21856, -21936,
+-22016, -22095, -22174, -22253, -22331,
+-22410, -22488, -22566, -22643, -22721,
+-22798, -22875, -22951, -23028, -23104,
+-23180, -23256, -23331, -23406, -23481,
+-23556, -23630, -23704, -23778, -23852,
+-23925, -23998, -24071, -24144, -24216,
+-24288, -24360, -24432, -24503, -24574,
+-24645, -24716, -24786, -24856, -24926,
+-24995, -25064, -25133, -25202, -25270,
+-25339, -25406, -25474, -25541, -25608,
+-25675, -25742, -25808, -25874, -25939,
+-26005, -26070, -26135, -26199, -26264,
+-26327, -26391, -26455, -26518, -26581,
+-26643, -26705, -26767, -26829, -26891,
+-26952, -27013, -27073, -27133, -27193,
+-27253, -27312, -27372, -27430, -27489,
+-27547, -27605, -27663, -27720, -27777,
+-27834, -27890, -27946, -28002, -28058,
+-28113, -28168, -28223, -28277, -28331,
+-28385, -28438, -28491, -28544, -28596,
+-28649, -28701, -28752, -28803, -28854,
+-28905, -28955, -29006, -29055, -29105,
+-29154, -29203, -29251, -29299, -29347,
+-29395, -29442, -29489, -29535, -29582,
+-29628, -29673, -29719, -29764, -29808,
+-29853, -29897, -29941, -29984, -30027,
+-30070, -30112, -30154, -30196, -30238,
+-30279, -30320, -30360, -30400, -30440,
+-30480, -30519, -30558, -30596, -30635,
+-30672, -30710, -30747, -30784, -30821,
+-30857, -30893, -30929, -30964, -30999,
+-31033, -31068, -31102, -31135, -31168,
+-31201, -31234, -31266, -31298, -31330,
+-31361, -31392, -31422, -31453, -31483,
+-31512, -31541, -31570, -31599, -31627,
+-31655, -31682, -31710, -31737, -31763,
+-31789, -31815, -31841, -31866, -31891,
+-31915, -31939, -31963, -31986, -32010,
+-32032, -32055, -32077, -32099, -32120,
+-32141, -32162, -32182, -32202, -32222,
+-32241, -32260, -32279, -32297, -32315,
+-32333, -32350, -32367, -32383, -32399,
+-32415, -32431, -32446, -32461, -32475,
+-32489, -32503, -32517, -32530, -32542,
+-32555, -32567, -32579, -32590, -32601,
+-32612, -32622, -32632, -32641, -32651,
+-32659, -32668, -32676, -32684, -32692,
+-32699, -32706, -32712, -32718, -32724,
+-32729, -32734, -32739, -32743, -32747,
+-32751, -32754, -32757, -32760, -32762,
+-32764, -32765, -32767, -32767, -32767,
+32767, 32767, 32765, 32761, 32756,
+32750, 32742, 32732, 32722, 32710,
+32696, 32681, 32665, 32647, 32628,
+32608, 32586, 32562, 32538, 32512,
+32484, 32455, 32425, 32393, 32360,
+32326, 32290, 32253, 32214, 32174,
+32133, 32090, 32046, 32001, 31954,
+31906, 31856, 31805, 31753, 31700,
+31645, 31588, 31530, 31471, 31411,
+31349, 31286, 31222, 31156, 31089,
+31020, 30951, 30880, 30807, 30733,
+30658, 30582, 30504, 30425, 30345,
+30263, 30181, 30096, 30011, 29924,
+29836, 29747, 29656, 29564, 29471,
+29377, 29281, 29184, 29086, 28987,
+28886, 28784, 28681, 28577, 28471,
+28365, 28257, 28147, 28037, 27925,
+27812, 27698, 27583, 27467, 27349,
+27231, 27111, 26990, 26868, 26744,
+26620, 26494, 26367, 26239, 26110,
+25980, 25849, 25717, 25583, 25449,
+25313, 25176, 25038, 24900, 24760,
+24619, 24477, 24333, 24189, 24044,
+23898, 23751, 23602, 23453, 23303,
+23152, 22999, 22846, 22692, 22537,
+22380, 22223, 22065, 21906, 21746,
+21585, 21423, 21261, 21097, 20933,
+20767, 20601, 20434, 20265, 20096,
+19927, 19756, 19584, 19412, 19239,
+19065, 18890, 18714, 18538, 18361,
+18183, 18004, 17824, 17644, 17463,
+17281, 17098, 16915, 16731, 16546,
+16361, 16175, 15988, 15800, 15612,
+15423, 15234, 15043, 14852, 14661,
+14469, 14276, 14083, 13889, 13694,
+13499, 13303, 13107, 12910, 12713,
+12515, 12317, 12118, 11918, 11718,
+11517, 11316, 11115, 10913, 10710,
+10508, 10304, 10100, 9896, 9691,
+9486, 9281, 9075, 8869, 8662,
+8455, 8248, 8040, 7832, 7623,
+7415, 7206, 6996, 6787, 6577,
+6366, 6156, 5945, 5734, 5523,
+5311, 5100, 4888, 4675, 4463,
+4251, 4038, 3825, 3612, 3399,
+3185, 2972, 2758, 2544, 2330,
+2116, 1902, 1688, 1474, 1260,
+1045, 831, 617, 402, 188,
+-27, -241, -456, -670, -885,
+-1099, -1313, -1528, -1742, -1956,
+-2170, -2384, -2598, -2811, -3025,
+-3239, -3452, -3665, -3878, -4091,
+-4304, -4516, -4728, -4941, -5153,
+-5364, -5576, -5787, -5998, -6209,
+-6419, -6629, -6839, -7049, -7258,
+-7467, -7676, -7884, -8092, -8300,
+-8507, -8714, -8920, -9127, -9332,
+-9538, -9743, -9947, -10151, -10355,
+-10558, -10761, -10963, -11165, -11367,
+-11568, -11768, -11968, -12167, -12366,
+-12565, -12762, -12960, -13156, -13352,
+-13548, -13743, -13937, -14131, -14324,
+-14517, -14709, -14900, -15091, -15281,
+-15470, -15659, -15847, -16035, -16221,
+-16407, -16593, -16777, -16961, -17144,
+-17326, -17508, -17689, -17869, -18049,
+-18227, -18405, -18582, -18758, -18934,
+-19108, -19282, -19455, -19627, -19799,
+-19969, -20139, -20308, -20475, -20642,
+-20809, -20974, -21138, -21301, -21464,
+-21626, -21786, -21946, -22105, -22263,
+-22420, -22575, -22730, -22884, -23037,
+-23189, -23340, -23490, -23640, -23788,
+-23935, -24080, -24225, -24369, -24512,
+-24654, -24795, -24934, -25073, -25211,
+-25347, -25482, -25617, -25750, -25882,
+-26013, -26143, -26272, -26399, -26526,
+-26651, -26775, -26898, -27020, -27141,
+-27260, -27379, -27496, -27612, -27727,
+-27841, -27953, -28065, -28175, -28284,
+-28391, -28498, -28603, -28707, -28810,
+-28911, -29012, -29111, -29209, -29305,
+-29401, -29495, -29587, -29679, -29769,
+-29858, -29946, -30032, -30118, -30201,
+-30284, -30365, -30445, -30524, -30601,
+-30677, -30752, -30825, -30897, -30968,
+-31038, -31106, -31172, -31238, -31302,
+-31365, -31426, -31486, -31545, -31602,
+-31658, -31713, -31766, -31818, -31869,
+-31918, -31966, -32012, -32058, -32101,
+-32144, -32185, -32224, -32262, -32299,
+-32335, -32369, -32401, -32433, -32463,
+-32491, -32518, -32544, -32568, -32591,
+-32613, -32633, -32652, -32669, -32685,
+-32700, -32713, -32724, -32735, -32744,
+-32751, -32757, -32762, -32766, -32767,
+32767, 32764, 32755, 32741, 32720,
+32694, 32663, 32626, 32583, 32535,
+32481, 32421, 32356, 32286, 32209,
+32128, 32041, 31948, 31850, 31747,
+31638, 31523, 31403, 31278, 31148,
+31012, 30871, 30724, 30572, 30415,
+30253, 30086, 29913, 29736, 29553,
+29365, 29172, 28974, 28771, 28564,
+28351, 28134, 27911, 27684, 27452,
+27216, 26975, 26729, 26478, 26223,
+25964, 25700, 25432, 25159, 24882,
+24601, 24315, 24026, 23732, 23434,
+23133, 22827, 22517, 22204, 21886,
+21565, 21240, 20912, 20580, 20244,
+19905, 19563, 19217, 18868, 18516,
+18160, 17802, 17440, 17075, 16708,
+16338, 15964, 15588, 15210, 14829,
+14445, 14059, 13670, 13279, 12886,
+12490, 12093, 11693, 11291, 10888,
+10482, 10075, 9666, 9255, 8843,
+8429, 8014, 7597, 7180, 6760,
+6340, 5919, 5496, 5073, 4649,
+4224, 3798, 3372, 2945, 2517,
+2090, 1661, 1233, 804, 375,
+-54, -483, -911, -1340, -1768,
+-2197, -2624, -3052, -3479, -3905,
+-4330, -4755, -5179, -5602, -6024,
+-6445, -6865, -7284, -7702, -8118,
+-8533, -8946, -9358, -9768, -10177,
+-10584, -10989, -11392, -11793, -12192,
+-12589, -12984, -13377, -13767, -14155,
+-14541, -14924, -15305, -15683, -16058,
+-16430, -16800, -17167, -17531, -17892,
+-18249, -18604, -18956, -19304, -19649,
+-19990, -20329, -20663, -20994, -21322,
+-21646, -21966, -22282, -22595, -22904,
+-23208, -23509, -23806, -24099, -24387,
+-24672, -24952, -25228, -25499, -25766,
+-26029, -26288, -26541, -26791, -27035,
+-27275, -27511, -27741, -27967, -28188,
+-28405, -28616, -28823, -29024, -29221,
+-29412, -29599, -29780, -29957, -30128,
+-30294, -30455, -30611, -30761, -30906,
+-31046, -31181, -31310, -31434, -31552,
+-31665, -31773, -31875, -31972, -32063,
+-32149, -32229, -32304, -32373, -32437,
+-32495, -32547, -32594, -32635, -32671,
+-32701, -32726, -32745, -32758, -32766,
+32767, 32754, 32717, 32658, 32577,
+32473, 32348, 32200, 32029, 31837,
+31624, 31388, 31131, 30853, 30553,
+30232, 29891, 29530, 29148, 28746,
+28324, 27883, 27423, 26944, 26447,
+25931, 25398, 24847, 24279, 23695,
+23095, 22478, 21846, 21199, 20538,
+19863, 19174, 18472, 17757, 17030,
+16291, 15541, 14781, 14010, 13230,
+12441, 11643, 10837, 10024, 9204,
+8377, 7545, 6708, 5866, 5020,
+4171, 3319, 2464, 1608, 751,
+-107, -965, -1822, -2678, -3532,
+-4383, -5232, -6077, -6918, -7754,
+-8585, -9409, -10228, -11039, -11843,
+-12639, -13426, -14204, -14972, -15730,
+-16477, -17213, -17937, -18648, -19347,
+-20033, -20705, -21363, -22006, -22634,
+-23246, -23843, -24423, -24986, -25533,
+-26062, -26573, -27066, -27540, -27995,
+-28431, -28848, -29245, -29622, -29979,
+-30315, -30630, -30924, -31197, -31449,
+-31679, -31887, -32074, -32239, -32381,
+-32501, -32600, -32675, -32729, -32759,
+};
 #endif
 
 static const CELTMode mode48000_960_120 = {
diff --git a/media/libopus/celt/static_modes_fixed_arm_ne10.h b/media/libopus/celt/static_modes_fixed_arm_ne10.h
new file mode 100644
index 0000000000..b8ef0cee98
--- /dev/null
+++ b/media/libopus/celt/static_modes_fixed_arm_ne10.h
@@ -0,0 +1,388 @@
+/* The contents of this file was automatically generated by
+ * dump_mode_arm_ne10.c with arguments: 48000 960
+ * It contains static definitions for some pre-defined modes. */
+#include <NE10_init.h>
+
+#ifndef NE10_FFT_PARAMS48000_960
+#define NE10_FFT_PARAMS48000_960
+static const ne10_int32_t ne10_factors_480[64] = {
+4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_240[64] = {
+3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_120[64] = {
+3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_60[64] = {
+2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
+{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
+{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
+{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
+{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
+{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
+{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
+{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
+{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
+{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
+{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
+{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
+{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
+{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
+{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
+{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570},
+{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154},
+{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172},
+{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068},
+{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822},
+{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146},
+{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682},
+{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231},
+{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791},
+{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029},
+{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313},
+{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783},
+{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661},
+{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541},
+{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450},
+{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914},
+{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194},
+{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807},
+{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067},
+{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658},
+{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677},
+{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704},
+{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277},
+{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512},
+{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510},
+{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682},
+{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424},
+{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524},
+{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195},
+{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164},
+{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771},
+{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961},
+{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705},
+{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540},
+{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994},
+{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540},
+{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646},
+{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814},
+{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593},
+{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667},
+{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
+{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
+{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
+{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
+{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
+{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
+{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
+{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
+{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
+{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
+{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
+{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
+{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
+{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
+{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
+{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
+{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
+{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
+{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
+{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
+{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584},
+{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622},
+{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955},
+{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651},
+{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703},
+{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114},
+{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330},
+{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403},
+{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078},
+{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372},
+{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738},
+{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248},
+{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400},
+{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552},
+{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141},
+{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553},
+{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377},
+{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448},
+{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240},
+{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574},
+{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630},
+{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288},
+{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960},
+{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934},
+{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097},
+{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189},
+{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282},
+{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280},
+{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875},
+{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603},
+{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339},
+{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386},
+{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674},
+{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010},
+{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564},
+{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860},
+{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118},
+{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768},
+{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839},
+{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595},
+{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001},
+{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837},
+{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918},
+{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354},
+{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119},
+{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555},
+{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188},
+{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928},
+{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484},
+{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706},
+{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252},
+{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149},
+{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287},
+{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850},
+{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059},
+{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202},
+{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458},
+{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823},
+{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542},
+{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
+{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
+{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
+{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
+{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
+{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
+{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
+{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
+{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
+{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
+{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
+{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
+{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
+{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
+{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
+{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
+{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
+{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
+{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
+{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
+{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
+{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
+{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
+{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
+{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496},
+{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855},
+{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883},
+{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330},
+{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268},
+{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035},
+{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910},
+{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276},
+{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785},
+{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298},
+{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465},
+{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435},
+{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248},
+{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368},
+{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912},
+{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536},
+{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647},
+{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493},
+{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827},
+{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413},
+{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096},
+{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084},
+{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406},
+{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881},
+{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895},
+{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257},
+{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506},
+{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107},
+{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239},
+{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
+{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
+{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
+{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
+{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
+{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
+{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
+{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
+{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
+{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
+{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
+{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
+{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
+{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
+{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
+{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
+{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
+{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
+{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
+};
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = {
+120,
+(ne10_int32_t *)ne10_factors_480,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_480,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120],
+};
+static const arch_fft_state cfg_arch_480 = {
+1,
+(void *)&ne10_fft_state_int32_t_480,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = {
+60,
+(ne10_int32_t *)ne10_factors_240,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_240,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60],
+};
+static const arch_fft_state cfg_arch_240 = {
+1,
+(void *)&ne10_fft_state_int32_t_240,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = {
+30,
+(ne10_int32_t *)ne10_factors_120,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_120,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30],
+};
+static const arch_fft_state cfg_arch_120 = {
+1,
+(void *)&ne10_fft_state_int32_t_120,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = {
+15,
+(ne10_int32_t *)ne10_factors_60,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_60,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15],
+};
+static const arch_fft_state cfg_arch_60 = {
+1,
+(void *)&ne10_fft_state_int32_t_60,
+};
+
+#endif  /* end NE10_FFT_PARAMS48000_960 */
diff --git a/media/libopus/celt/static_modes_float.h b/media/libopus/celt/static_modes_float.h
index 5d7e7b8e68..e102a38391 100644
--- a/media/libopus/celt/static_modes_float.h
+++ b/media/libopus/celt/static_modes_float.h
@@ -4,6 +4,11 @@
 #include "modes.h"
 #include "rate.h"
 
+#ifdef HAVE_ARM_NE10
+#define OVERRIDE_FFT 1
+#include "static_modes_float_arm_ne10.h"
+#endif
+
 #ifndef DEF_WINDOW120
 #define DEF_WINDOW120
 static const opus_val16 window120[120] = {
@@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
 #ifndef FFT_BITREV480
 #define FFT_BITREV480
 static const opus_int16 fft_bitrev480[480] = {
-0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
-450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
-345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
-215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
-110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
-430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
-325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
-181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
-76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
-396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
-291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
-161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
-56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
-362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
-257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
-127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
-22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
-472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
-342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
-237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
-93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
-438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
-308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
-203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
-73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
-418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
-274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
-169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
-39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
-384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
-254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
-149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
+8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
+16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
+24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
+4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
+12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
+20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
+28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
+1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
+9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
+17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
+25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
+5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
+13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
+21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
+29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
+2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
+10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
+18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
+26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
+6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
+14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
+22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
+30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
+3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
+11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
+19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
+27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
+7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
+15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
+23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
+31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
 };
 #endif
 
 #ifndef FFT_BITREV240
 #define FFT_BITREV240
 static const opus_int16 fft_bitrev240[240] = {
-0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
-225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
-170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
-115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
-46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
-216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
-161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
-92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
-37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
-207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
-138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
-83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
-28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
-184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
-129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
-74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
+4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
+8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
+12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
+1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
+5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
+9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
+13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
+2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
+6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
+10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
+14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
+3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
+7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
+11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
+15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
 };
 #endif
 
 #ifndef FFT_BITREV120
 #define FFT_BITREV120
 static const opus_int16 fft_bitrev120[120] = {
-0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
-110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
-76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
-56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
-22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
-93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
-73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
-39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
+4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
+1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
+5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
+2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
+6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
+3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
+7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
 };
 #endif
 
 #ifndef FFT_BITREV60
 #define FFT_BITREV60
 static const opus_int16 fft_bitrev60[60] = {
-0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
-46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
-37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
-28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
+1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
+2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
+3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
 };
 #endif
 
@@ -428,9 +433,14 @@ static const kiss_fft_state fft_state48000_960_0 = {
 480,    /* nfft */
 0.002083333f,   /* scale */
 -1,     /* shift */
-{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, },   /* factors */
 fft_bitrev480,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_480,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -440,9 +450,14 @@ static const kiss_fft_state fft_state48000_960_1 = {
 240,    /* nfft */
 0.004166667f,   /* scale */
 1,      /* shift */
-{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev240,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_240,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -452,9 +467,14 @@ static const kiss_fft_state fft_state48000_960_2 = {
 120,    /* nfft */
 0.008333333f,   /* scale */
 2,      /* shift */
-{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev120,  /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_120,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -464,9 +484,14 @@ static const kiss_fft_state fft_state48000_960_3 = {
 60,     /* nfft */
 0.016666667f,   /* scale */
 3,      /* shift */
-{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
 fft_bitrev60,   /* bitrev */
 fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_60,
+#else
+NULL,
+#endif
 };
 #endif
 
@@ -474,104 +499,368 @@ fft_twiddles48000_960,  /* bitrev */
 
 #ifndef MDCT_TWIDDLES960
 #define MDCT_TWIDDLES960
-static const opus_val16 mdct_twiddles960[481] = {
-1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
-0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
-0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
-0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
-0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
-0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
-0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
-0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
-0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
-0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
-0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
-0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
-0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
-0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
-0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
-0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
-0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
-0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
-0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
-0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
-0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
-0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
-0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
-0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
-0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
-0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
-0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
-0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
-0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
-0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
-0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
-0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
-0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
-0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
-0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
-0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
-0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
-0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
-0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
-0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
-0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
-0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
-0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
-0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
-0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
-0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
-0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
-0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
-0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
-0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
-0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
-0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
-0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
-0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
-0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
-0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
-0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
-0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
-0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
-0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
-0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
-0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
-0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
-0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
-0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
-0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
-0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
-0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
-0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
-0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
-0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
-0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
-0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
-0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
-0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
-0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
-0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
-0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
-0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
-0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
-0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
-0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
-0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
-0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
-0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
-0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
-0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
-0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
-0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
-0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
-0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
-0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
-0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
-0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
-0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
-0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
--4.3711390e-08f, };
+static const opus_val16 mdct_twiddles960[1800] = {
+0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f,
+0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f,
+0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f,
+0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f,
+0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f,
+0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f,
+0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f,
+0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f,
+0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f,
+0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f,
+0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f,
+0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f,
+0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f,
+0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f,
+0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f,
+0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f,
+0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f,
+0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f,
+0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f,
+0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f,
+0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f,
+0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f,
+0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f,
+0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f,
+0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f,
+0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f,
+0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f,
+0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f,
+0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f,
+0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f,
+0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f,
+0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f,
+0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f,
+0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f,
+0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f,
+0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f,
+0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f,
+0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f,
+0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f,
+0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f,
+0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f,
+0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f,
+0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f,
+0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f,
+0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f,
+0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f,
+0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f,
+0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f,
+0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f,
+0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f,
+0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f,
+0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f,
+0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f,
+0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f,
+0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f,
+0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f,
+0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f,
+0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f,
+0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f,
+0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f,
+0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f,
+0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f,
+0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f,
+0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f,
+0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f,
+0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f,
+0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f,
+0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f,
+0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f,
+0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f,
+0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f,
+0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f,
+0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f,
+0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f,
+0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f,
+0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f,
+0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f,
+0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f,
+0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f,
+0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f,
+0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f,
+0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f,
+0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f,
+0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f,
+0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f,
+0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f,
+0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f,
+0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f,
+0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f,
+0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f,
+0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f,
+0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f,
+0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f,
+0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f,
+0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f,
+0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f,
+-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f,
+-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f,
+-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f,
+-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f,
+-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f,
+-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f,
+-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f,
+-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f,
+-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f,
+-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f,
+-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f,
+-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f,
+-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f,
+-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f,
+-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f,
+-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f,
+-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f,
+-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f,
+-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f,
+-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f,
+-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f,
+-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f,
+-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f,
+-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f,
+-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f,
+-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f,
+-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f,
+-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f,
+-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f,
+-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f,
+-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f,
+-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f,
+-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f,
+-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f,
+-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f,
+-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f,
+-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f,
+-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f,
+-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f,
+-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f,
+-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f,
+-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f,
+-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f,
+-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f,
+-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f,
+-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f,
+-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f,
+-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f,
+-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f,
+-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f,
+-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f,
+-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f,
+-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f,
+-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f,
+-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f,
+-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f,
+-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f,
+-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f,
+-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f,
+-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f,
+-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f,
+-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f,
+-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f,
+-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f,
+-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f,
+-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f,
+-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f,
+-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f,
+-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f,
+-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f,
+-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f,
+-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f,
+-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f,
+-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f,
+-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f,
+-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f,
+-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f,
+-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f,
+-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f,
+-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f,
+-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f,
+-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f,
+-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f,
+-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f,
+-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f,
+-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f,
+-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f,
+-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f,
+-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f,
+-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f,
+-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f,
+-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f,
+-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f,
+-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f,
+-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f,
+-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f,
+0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f,
+0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f,
+0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f,
+0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f,
+0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f,
+0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f,
+0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f,
+0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f,
+0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f,
+0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f,
+0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f,
+0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f,
+0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f,
+0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f,
+0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f,
+0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f,
+0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f,
+0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f,
+0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f,
+0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f,
+0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f,
+0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f,
+0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f,
+0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f,
+0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f,
+0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f,
+0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f,
+0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f,
+0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f,
+0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f,
+0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f,
+0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f,
+0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f,
+0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f,
+0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f,
+0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f,
+0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f,
+0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f,
+0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f,
+0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f,
+0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f,
+0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f,
+0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f,
+0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f,
+0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f,
+0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f,
+0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f,
+0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f,
+-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f,
+-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f,
+-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f,
+-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f,
+-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f,
+-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f,
+-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f,
+-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f,
+-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f,
+-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f,
+-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f,
+-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f,
+-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f,
+-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f,
+-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f,
+-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f,
+-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f,
+-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f,
+-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f,
+-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f,
+-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f,
+-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f,
+-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f,
+-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f,
+-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f,
+-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f,
+-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f,
+-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f,
+-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f,
+-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f,
+-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f,
+-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f,
+-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f,
+-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f,
+-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f,
+-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f,
+-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f,
+-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f,
+-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f,
+-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f,
+-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f,
+-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f,
+-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f,
+-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f,
+-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f,
+-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f,
+-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f,
+-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f,
+0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f,
+0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f,
+0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f,
+0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f,
+0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f,
+0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f,
+0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f,
+0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f,
+0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f,
+0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f,
+0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f,
+0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f,
+0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f,
+0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f,
+0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f,
+0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f,
+0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f,
+0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f,
+0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f,
+0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f,
+0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f,
+0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f,
+0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f,
+0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f,
+-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f,
+-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f,
+-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f,
+-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f,
+-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f,
+-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f,
+-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f,
+-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f,
+-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f,
+-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f,
+-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f,
+-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f,
+-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f,
+-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f,
+-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f,
+-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f,
+-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f,
+-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f,
+-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f,
+-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f,
+-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f,
+-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f,
+-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f,
+-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f,
+0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f,
+0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f,
+0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f,
+0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f,
+0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f,
+0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f,
+0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f,
+0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f,
+0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f,
+0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f,
+0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f,
+0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f,
+-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f,
+-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f,
+-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f,
+-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f,
+-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f,
+-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f,
+-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f,
+-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f,
+-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f,
+-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f,
+-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f,
+-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f,
+};
 #endif
 
 static const CELTMode mode48000_960_120 = {
diff --git a/media/libopus/celt/static_modes_float_arm_ne10.h b/media/libopus/celt/static_modes_float_arm_ne10.h
new file mode 100644
index 0000000000..934a82a420
--- /dev/null
+++ b/media/libopus/celt/static_modes_float_arm_ne10.h
@@ -0,0 +1,404 @@
+/* The contents of this file was automatically generated by
+ * dump_mode_arm_ne10.c with arguments: 48000 960
+ * It contains static definitions for some pre-defined modes. */
+#include <NE10_init.h>
+
+#ifndef NE10_FFT_PARAMS48000_960
+#define NE10_FFT_PARAMS48000_960
+static const ne10_int32_t ne10_factors_480[64] = {
+4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_240[64] = {
+3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_120[64] = {
+3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_60[64] = {
+2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
+{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
+{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
+{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
+{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
+{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
+{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
+{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
+{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
+{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
+{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
+{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
+{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
+{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
+{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
+{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
+{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
+{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
+{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
+{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
+{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
+{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
+{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
+{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
+{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
+{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
+{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
+{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
+{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
+{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
+{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
+{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
+{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
+{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
+{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
+{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
+{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
+{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
+{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
+{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
+{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
+{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
+{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
+{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
+{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
+{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
+{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
+{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
+{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
+{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
+{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
+{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
+{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
+{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
+{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
+{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
+{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
+{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
+{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
+{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
+{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
+{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
+{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
+{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
+{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
+{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
+{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
+{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
+{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
+{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
+{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
+{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
+{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
+{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
+{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
+{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
+{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
+{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
+{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
+{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
+{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
+{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
+{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
+{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
+{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
+{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
+{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
+{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
+{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
+{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
+{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
+{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
+{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
+{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
+{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
+{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
+{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
+{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
+{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
+{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
+{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
+{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
+{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
+{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
+{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
+{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
+{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
+{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
+{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
+{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
+{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
+{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
+{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
+{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
+{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
+{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
+{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
+{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
+{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
+{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
+{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
+{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
+{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
+{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
+{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
+{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
+{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
+{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+};
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = {
+120,
+(ne10_int32_t *)ne10_factors_480,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_480 = {
+1,
+(void *)&ne10_fft_state_float32_t_480,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = {
+60,
+(ne10_int32_t *)ne10_factors_240,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_240 = {
+1,
+(void *)&ne10_fft_state_float32_t_240,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = {
+30,
+(ne10_int32_t *)ne10_factors_120,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_120 = {
+1,
+(void *)&ne10_fft_state_float32_t_120,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = {
+15,
+(ne10_int32_t *)ne10_factors_60,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_60 = {
+1,
+(void *)&ne10_fft_state_float32_t_60,
+};
+
+#endif  /* end NE10_FFT_PARAMS48000_960 */
diff --git a/media/libopus/celt/vq.c b/media/libopus/celt/vq.c
index 98a0f36c9f..f358396065 100644
--- a/media/libopus/celt/vq.c
+++ b/media/libopus/celt/vq.c
@@ -37,19 +37,23 @@
 #include "os_support.h"
 #include "bands.h"
 #include "rate.h"
+#include "pitch.h"
 
+#ifndef OVERRIDE_vq_exp_rotation1
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
    int i;
+   opus_val16 ms;
    celt_norm *Xptr;
    Xptr = X;
+   ms = NEG16(s);
    for (i=0;i<len-stride;i++)
    {
       celt_norm x1, x2;
       x1 = Xptr[0];
       x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
-      *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
    }
    Xptr = &X[len-2*stride-1];
    for (i=len-2*stride-1;i>=0;i--)
@@ -57,10 +61,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
       celt_norm x1, x2;
       x1 = Xptr[0];
       x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
-      *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
    }
 }
+#endif /* OVERRIDE_vq_exp_rotation1 */
 
 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
 {
@@ -91,7 +96,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
    }
    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
       extract_collapse_mask().*/
-   len /= stride;
+   len = celt_udiv(len, stride);
    for (i=0;i<stride;i++)
    {
       if (dir < 0)
@@ -140,13 +145,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
       return 1;
    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
       exp_rotation().*/
-   N0 = N/B;
+   N0 = celt_udiv(N, B);
    collapse_mask = 0;
    i=0; do {
       int j;
+      unsigned tmp=0;
       j=0; do {
-         collapse_mask |= (iy[i*N0+j]!=0)<<i;
+         tmp |= iy[i*N0+j];
       } while (++j<N0);
+      collapse_mask |= (tmp!=0)<<i;
    } while (++i<B);
    return collapse_mask;
 }
@@ -322,7 +329,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
       ec_dec *dec, opus_val16 gain)
 {
-   int i;
    opus_val32 Ryy;
    unsigned collapse_mask;
    VARDECL(int, iy);
@@ -331,12 +337,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
    celt_assert2(K>0, "alg_unquant() needs at least one pulse");
    celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
    ALLOC(iy, N, int);
-   decode_pulses(iy, N, K, dec);
-   Ryy = 0;
-   i=0;
-   do {
-      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
-   } while (++i < N);
+   Ryy = decode_pulses(iy, N, K, dec);
    normalise_residual(iy, X, N, Ryy, gain);
    exp_rotation(X, N, -1, B, K, spread);
    collapse_mask = extract_collapse_mask(iy, N, B);
@@ -344,21 +345,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
    return collapse_mask;
 }
 
-void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
+#ifndef OVERRIDE_renormalise_vector
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
 {
    int i;
 #ifdef FIXED_POINT
    int k;
 #endif
-   opus_val32 E = EPSILON;
+   opus_val32 E;
    opus_val16 g;
    opus_val32 t;
-   celt_norm *xptr = X;
-   for (i=0;i<N;i++)
-   {
-      E = MAC16_16(E, *xptr, *xptr);
-      xptr++;
-   }
+   celt_norm *xptr;
+   E = EPSILON + celt_inner_prod(X, X, N, arch);
 #ifdef FIXED_POINT
    k = celt_ilog2(E)>>1;
 #endif
@@ -373,8 +371,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
    }
    /*return celt_sqrt(E);*/
 }
+#endif /* OVERRIDE_renormalise_vector */
 
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
 {
    int i;
    int itheta;
@@ -393,14 +392,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
          Eside = MAC16_16(Eside, s, s);
       }
    } else {
-      for (i=0;i<N;i++)
-      {
-         celt_norm m, s;
-         m = X[i];
-         s = Y[i];
-         Emid = MAC16_16(Emid, m, m);
-         Eside = MAC16_16(Eside, s, s);
-      }
+      Emid += celt_inner_prod(X, X, N, arch);
+      Eside += celt_inner_prod(Y, Y, N, arch);
    }
    mid = celt_sqrt(Emid);
    side = celt_sqrt(Eside);
diff --git a/media/libopus/celt/vq.h b/media/libopus/celt/vq.h
index ffdc69cdc4..5cfcbe50ea 100644
--- a/media/libopus/celt/vq.h
+++ b/media/libopus/celt/vq.h
@@ -37,6 +37,11 @@
 #include "entdec.h"
 #include "modes.h"
 
+#if defined(MIPSr1_ASM)
+#include "mips/vq_mipsr1.h"
+#endif
+
+
 /** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
   * the pitch and a combination of pulses such that its norm is still equal
   * to 1. This is the function that will typically require the most CPU.
@@ -63,8 +68,8 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
       ec_dec *dec, opus_val16 gain);
 
-void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
 
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
 
 #endif /* VQ_H */
diff --git a/media/libopus/celt/x86/celt_lpc_sse.c b/media/libopus/celt/x86/celt_lpc_sse.c
new file mode 100644
index 0000000000..67e5592acf
--- /dev/null
+++ b/media/libopus/celt/x86/celt_lpc_sse.c
@@ -0,0 +1,132 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+#include "x86cpu.h"
+
+#if defined(FIXED_POINT)
+
+void celt_fir_sse4_1(const opus_val16 *_x,
+         const opus_val16 *num,
+         opus_val16 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem,
+         int arch)
+{
+    int i,j;
+    VARDECL(opus_val16, rnum);
+    VARDECL(opus_val16, x);
+
+    __m128i vecNoA;
+    opus_int32 noA ;
+    SAVE_STACK;
+
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+
+   for (i=0;i<N-7;i+=8)
+   {
+       x[i+ord  ]=_x[i  ];
+       x[i+ord+1]=_x[i+1];
+       x[i+ord+2]=_x[i+2];
+       x[i+ord+3]=_x[i+3];
+       x[i+ord+4]=_x[i+4];
+       x[i+ord+5]=_x[i+5];
+       x[i+ord+6]=_x[i+6];
+       x[i+ord+7]=_x[i+7];
+   }
+
+   for (;i<N-3;i+=4)
+   {
+       x[i+ord  ]=_x[i  ];
+       x[i+ord+1]=_x[i+1];
+       x[i+ord+2]=_x[i+2];
+       x[i+ord+3]=_x[i+3];
+   }
+
+   for (;i<N;i++)
+         x[i+ord]=_x[i];
+
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
+      for (j=0;j<ord;j++)
+      {
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      }
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
+   }
+#else
+   noA = EXTEND32(1) << SIG_SHIFT >> 1;
+   vecNoA = _mm_set_epi32(noA, noA, noA, noA);
+
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sums[4] = {0};
+      __m128i vecSum, vecX;
+
+      xcorr_kernel(rnum, x+i, sums, ord, arch);
+
+      vecSum = _mm_loadu_si128((__m128i *)sums);
+      vecSum = _mm_add_epi32(vecSum, vecNoA);
+      vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
+      vecX = OP_CVTEPI16_EPI32_M64(_x + i);
+      vecSum = _mm_add_epi32(vecSum, vecX);
+      vecSum = _mm_packs_epi32(vecSum, vecSum);
+      _mm_storel_epi64((__m128i *)(_y + i), vecSum);
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum, rnum[j], x[i + j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+
+#endif
+   RESTORE_STACK;
+}
+
+#endif
diff --git a/media/libopus/celt/x86/celt_lpc_sse.h b/media/libopus/celt/x86/celt_lpc_sse.h
new file mode 100644
index 0000000000..c5ec796ed5
--- /dev/null
+++ b/media/libopus/celt/x86/celt_lpc_sse.h
@@ -0,0 +1,68 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_LPC_SSE_H
+#define CELT_LPC_SSE_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_CELT_FIR
+
+void celt_fir_sse4_1(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         opus_val16 *mem,
+         int arch);
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+#define celt_fir(x, num, y, N, ord, mem, arch) \
+    ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
+
+#else
+
+extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         opus_val16 *mem,
+         int arch);
+
+#  define celt_fir(x, num, y, N, ord, mem, arch) \
+    ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
+
+#endif
+#endif
+
+#endif
diff --git a/media/libopus/celt/x86/pitch_sse.c b/media/libopus/celt/x86/pitch_sse.c
new file mode 100644
index 0000000000..20e73126b6
--- /dev/null
+++ b/media/libopus/celt/x86/pitch_sse.c
@@ -0,0 +1,185 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+{
+   int j;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_loadu_ps(sum);
+   xsum2 = _mm_setzero_ps();
+
+   for (j = 0; j < len-3; j += 4)
+   {
+      __m128 x0 = _mm_loadu_ps(x+j);
+      __m128 yj = _mm_loadu_ps(y+j);
+      __m128 y3 = _mm_loadu_ps(y+j+3);
+
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
+                                          _mm_shuffle_ps(yj,y3,0x49)));
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
+                                          _mm_shuffle_ps(yj,y3,0x9e)));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
+   }
+   if (j < len)
+   {
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+      if (++j < len)
+      {
+         xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         if (++j < len)
+         {
+            xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         }
+      }
+   }
+   _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+}
+
+
+void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_setzero_ps();
+   xsum2 = _mm_setzero_ps();
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 y1i = _mm_loadu_ps(y01+i);
+      __m128 y2i = _mm_loadu_ps(y02+i);
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
+   }
+   /* Horizontal sum */
+   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
+   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
+   _mm_store_ss(xy1, xsum1);
+   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
+   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
+   _mm_store_ss(xy2, xsum2);
+   for (;i<N;i++)
+   {
+      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
+      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
+   }
+}
+
+opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+   int i;
+   float xy;
+   __m128 sum;
+   sum = _mm_setzero_ps();
+   /* FIXME: We should probably go 8-way and use 2 sums. */
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 yi = _mm_loadu_ps(y+i);
+      sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
+   }
+   /* Horizontal sum */
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&xy, sum);
+   for (;i<N;i++)
+   {
+      xy = MAC16_16(xy, x[i], y[i]);
+   }
+   return xy;
+}
+
+void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   int i;
+   __m128 x0v;
+   __m128 g10v, g11v, g12v;
+   g10v = _mm_load1_ps(&g10);
+   g11v = _mm_load1_ps(&g11);
+   g12v = _mm_load1_ps(&g12);
+   x0v = _mm_loadu_ps(&x[-T-2]);
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 yi, yi2, x1v, x2v, x3v, x4v;
+      const opus_val32 *xp = &x[i-T-2];
+      yi = _mm_loadu_ps(x+i);
+      x4v = _mm_loadu_ps(xp+4);
+#if 0
+      /* Slower version with all loads */
+      x1v = _mm_loadu_ps(xp+1);
+      x2v = _mm_loadu_ps(xp+2);
+      x3v = _mm_loadu_ps(xp+3);
+#else
+      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
+      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
+      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
+#endif
+
+      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
+#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
+      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
+      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#else
+      /* Use partial sums */
+      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
+                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+      yi = _mm_add_ps(yi, yi2);
+#endif
+      x0v=x4v;
+      _mm_storeu_ps(y+i, yi);
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x[i-T])
+               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
+   }
+#endif
+}
+
+
+#endif
diff --git a/media/libopus/celt/x86/pitch_sse.h b/media/libopus/celt/x86/pitch_sse.h
index 695122a5ad..d4cbeb8b9c 100644
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
+/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
+   Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
 /**
    @file pitch_sse.h
    @brief Pitch analysis
@@ -32,125 +33,160 @@
 #ifndef PITCH_SSE_H
 #define PITCH_SSE_H
 
-#include <xmmintrin.h>
-#include "arch.h"
+#if defined(HAVE_CONFIG_H)
+#include "config.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+void xcorr_kernel_sse4_1(
+                    const opus_int16 *x,
+                    const opus_int16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
+void xcorr_kernel_sse(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#endif
+
+#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
+
+#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)arch, xcorr_kernel_sse(x, y, sum, len))
+
+#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
 
 #define OVERRIDE_XCORR_KERNEL
-static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
-{
-   int j;
-   __m128 xsum1, xsum2;
-   xsum1 = _mm_loadu_ps(sum);
-   xsum2 = _mm_setzero_ps();
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
 
-   for (j = 0; j < len-3; j += 4)
-   {
-      __m128 x0 = _mm_loadu_ps(x+j);
-      __m128 yj = _mm_loadu_ps(y+j);
-      __m128 y3 = _mm_loadu_ps(y+j+3);
+#endif
 
-      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
-      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
-                                          _mm_shuffle_ps(yj,y3,0x49)));
-      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
-                                          _mm_shuffle_ps(yj,y3,0x9e)));
-      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
-   }
-   if (j < len)
-   {
-      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
-      if (++j < len)
-      {
-         xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
-         if (++j < len)
-         {
-            xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
-         }
-      }
-   }
-   _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
-}
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse4_1(
+    const opus_int16 *x,
+    const opus_int16 *y,
+    int               N);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse2(
+    const opus_int16 *x,
+    const opus_int16 *y,
+    int               N);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse(
+    const opus_val16 *x,
+    const opus_val16 *y,
+    int               N);
+#endif
+
+
+#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+	((void)arch, celt_inner_prod_sse4_1(x, y, N))
+
+#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+	((void)arch, celt_inner_prod_sse2(x, y, N))
+
+#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+	((void)arch, celt_inner_prod_sse(x, y, N))
+
+
+#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
+	(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+
+extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    int               N);
+
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+    ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
+
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
 
 #define OVERRIDE_DUAL_INNER_PROD
-static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
-      int N, opus_val32 *xy1, opus_val32 *xy2)
-{
-   int i;
-   __m128 xsum1, xsum2;
-   xsum1 = _mm_setzero_ps();
-   xsum2 = _mm_setzero_ps();
-   for (i=0;i<N-3;i+=4)
-   {
-      __m128 xi = _mm_loadu_ps(x+i);
-      __m128 y1i = _mm_loadu_ps(y01+i);
-      __m128 y2i = _mm_loadu_ps(y02+i);
-      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
-      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
-   }
-   /* Horizontal sum */
-   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
-   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
-   _mm_store_ss(xy1, xsum1);
-   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
-   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
-   _mm_store_ss(xy2, xsum2);
-   for (;i<N;i++)
-   {
-      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
-      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
-   }
-}
-
 #define OVERRIDE_COMB_FILTER_CONST
-static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
-      opus_val16 g10, opus_val16 g11, opus_val16 g12)
-{
-   int i;
-   __m128 x0v;
-   __m128 g10v, g11v, g12v;
-   g10v = _mm_load1_ps(&g10);
-   g11v = _mm_load1_ps(&g11);
-   g12v = _mm_load1_ps(&g12);
-   x0v = _mm_loadu_ps(&x[-T-2]);
-   for (i=0;i<N-3;i+=4)
-   {
-      __m128 yi, yi2, x1v, x2v, x3v, x4v;
-      const opus_val32 *xp = &x[i-T-2];
-      yi = _mm_loadu_ps(x+i);
-      x4v = _mm_loadu_ps(xp+4);
-#if 0
-      /* Slower version with all loads */
-      x1v = _mm_loadu_ps(xp+1);
-      x2v = _mm_loadu_ps(xp+2);
-      x3v = _mm_loadu_ps(xp+3);
-#else
-      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
-      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
-      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
-#endif
 
-      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
-#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
-      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
-      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#undef dual_inner_prod
+#undef comb_filter_const
+
+void dual_inner_prod_sse(const opus_val16 *x,
+	const opus_val16 *y01,
+	const opus_val16 *y02,
+	int               N,
+	opus_val32       *xy1,
+	opus_val32       *xy2);
+
+void comb_filter_const_sse(opus_val32 *y,
+	opus_val32 *x,
+	int         T,
+	int         N,
+	opus_val16  g10,
+	opus_val16  g11,
+	opus_val16  g12);
+
+
+#if defined(OPUS_X86_PRESUME_SSE)
+# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
+    ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
+
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+    ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
 #else
-      /* Use partial sums */
-      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
-                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
-      yi = _mm_add_ps(yi, yi2);
-#endif
-      x0v=x4v;
-      _mm_storeu_ps(y+i, yi);
-   }
-#ifdef CUSTOM_MODES
-   for (;i<N;i++)
-   {
-      y[i] = x[i]
-               + MULT16_32_Q15(g10,x[i-T])
-               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
-               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
-   }
-#endif
-}
+
+extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+              const opus_val16 *x,
+              const opus_val16 *y01,
+              const opus_val16 *y02,
+              int               N,
+              opus_val32       *xy1,
+              opus_val32       *xy2);
+
+#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch)			\
+    ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
+
+extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
+              opus_val32 *y,
+              opus_val32 *x,
+              int         T,
+              int         N,
+              opus_val16  g10,
+              opus_val16  g11,
+              opus_val16  g12);
+
+#define comb_filter_const(y, x, T, N, g10, g11, g12, arch)				\
+    ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
+
+#define NON_STATIC_COMB_FILTER_CONST_C
+
+#endif
+#endif
 
 #endif
diff --git a/media/libopus/celt/x86/pitch_sse2.c b/media/libopus/celt/x86/pitch_sse2.c
new file mode 100644
index 0000000000..a0e7d1beaf
--- /dev/null
+++ b/media/libopus/celt/x86/pitch_sse2.c
@@ -0,0 +1,95 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+    opus_int  i, dataSize16;
+    opus_int32 sum;
+
+    __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
+    __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
+
+    sum = 0;
+    dataSize16 = N & ~15;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for (i=0;i<dataSize16;i+=16)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
+        inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+        inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
+    }
+
+    acc1 = _mm_add_epi32( acc1, acc2 );
+
+    if (N - i >= 8)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        i += 8;
+    }
+
+    acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
+    acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
+    sum += _mm_cvtsi128_si32(acc1);
+
+    for (;i<N;i++) {
+        sum = silk_SMLABB(sum, x[i], y[i]);
+    }
+
+    return sum;
+}
+#endif
diff --git a/media/libopus/celt/x86/pitch_sse4_1.c b/media/libopus/celt/x86/pitch_sse4_1.c
new file mode 100644
index 0000000000..a092c68b24
--- /dev/null
+++ b/media/libopus/celt/x86/pitch_sse4_1.c
@@ -0,0 +1,195 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+#include <smmintrin.h>
+#include "x86cpu.h"
+
+opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+    opus_int  i, dataSize16;
+    opus_int32 sum;
+    __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
+    __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
+    __m128i inVec1_3210, inVec2_3210;
+
+    sum = 0;
+    dataSize16 = N & ~15;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for (i=0;i<dataSize16;i+=16) {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
+        inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+        inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
+    }
+
+    acc1 = _mm_add_epi32(acc1, acc2);
+
+    if (N - i >= 8)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        i += 8;
+    }
+
+    if (N - i >= 4)
+    {
+        inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
+        inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
+
+        inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_3210);
+        i += 4;
+    }
+
+    acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
+    acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
+
+    sum += _mm_cvtsi128_si32(acc1);
+
+    for (;i<N;i++)
+    {
+        sum = silk_SMLABB(sum, x[i], y[i]);
+    }
+
+    return sum;
+}
+
+void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
+{
+    int j;
+
+    __m128i vecX, vecX0, vecX1, vecX2, vecX3;
+    __m128i vecY0, vecY1, vecY2, vecY3;
+    __m128i sum0, sum1, sum2, sum3, vecSum;
+    __m128i initSum;
+
+    celt_assert(len >= 3);
+
+    sum0 = _mm_setzero_si128();
+    sum1 = _mm_setzero_si128();
+    sum2 = _mm_setzero_si128();
+    sum3 = _mm_setzero_si128();
+
+    for (j=0;j<(len-7);j+=8)
+    {
+        vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
+        vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
+        vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
+        vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
+        vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
+
+        sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
+        sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
+        sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
+        sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
+    }
+
+    sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
+    sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
+
+    sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
+    sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
+
+    sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
+    sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
+
+    sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
+    sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
+
+    vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
+          _mm_unpacklo_epi32(sum2, sum3));
+
+    for (;j<(len-3);j+=4)
+    {
+        vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
+        vecX0 = _mm_shuffle_epi32(vecX, 0x00);
+        vecX1 = _mm_shuffle_epi32(vecX, 0x55);
+        vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
+        vecX3 = _mm_shuffle_epi32(vecX, 0xff);
+
+        vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
+        vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
+        vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
+        vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
+
+        sum0 = _mm_mullo_epi32(vecX0, vecY0);
+        sum1 = _mm_mullo_epi32(vecX1, vecY1);
+        sum2 = _mm_mullo_epi32(vecX2, vecY2);
+        sum3 = _mm_mullo_epi32(vecX3, vecY3);
+
+        sum0 = _mm_add_epi32(sum0, sum1);
+        sum2 = _mm_add_epi32(sum2, sum3);
+        vecSum = _mm_add_epi32(vecSum, sum0);
+        vecSum = _mm_add_epi32(vecSum, sum2);
+    }
+
+    for (;j<len;j++)
+    {
+        vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
+        vecX0 = _mm_shuffle_epi32(vecX, 0x00);
+
+        vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
+
+        sum0 = _mm_mullo_epi32(vecX0, vecY0);
+        vecSum = _mm_add_epi32(vecSum, sum0);
+    }
+
+    initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
+    initSum = _mm_add_epi32(initSum, vecSum);
+    _mm_storeu_si128((__m128i *)sum, initSum);
+}
+#endif
diff --git a/media/libopus/celt/x86/x86_celt_map.c b/media/libopus/celt/x86/x86_celt_map.c
new file mode 100644
index 0000000000..8e5e449275
--- /dev/null
+++ b/media/libopus/celt/x86/x86_celt_map.c
@@ -0,0 +1,155 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+#include "config.h"
+#endif
+
+#include "x86/x86cpu.h"
+#include "celt_lpc.h"
+#include "pitch.h"
+#include "pitch_sse.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(FIXED_POINT)
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
+
+void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16       *y,
+         int              N,
+         int              ord,
+         opus_val16       *mem,
+         int              arch
+) = {
+  celt_fir_c,                /* non-sse */
+  celt_fir_c,
+  celt_fir_c,
+  MAY_HAVE_SSE4_1(celt_fir), /* sse4.1  */
+  MAY_HAVE_SSE4_1(celt_fir)  /* avx  */
+};
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* non-sse */
+  xcorr_kernel_c,
+  xcorr_kernel_c,
+  MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
+  MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
+};
+
+#endif
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
+	(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
+
+opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         int              N
+) = {
+  celt_inner_prod_c,                /* non-sse */
+  celt_inner_prod_c,
+  MAY_HAVE_SSE2(celt_inner_prod),
+  MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1  */
+  MAY_HAVE_SSE4_1(celt_inner_prod)  /* avx  */
+};
+
+#endif
+
+# else
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* non-sse */
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel)
+};
+
+opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         int              N
+) = {
+  celt_inner_prod_c,                /* non-sse */
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod)
+};
+
+void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y01,
+                    const opus_val16 *y02,
+                    int               N,
+                    opus_val32       *xy1,
+                    opus_val32       *xy2
+) = {
+  dual_inner_prod_c,                /* non-sse */
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod)
+};
+
+void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
+              opus_val32 *y,
+              opus_val32 *x,
+              int         T,
+              int         N,
+              opus_val16  g10,
+              opus_val16  g11,
+              opus_val16  g12
+) = {
+  comb_filter_const_c,                /* non-sse */
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const)
+};
+
+
+#endif
+
+#endif
+#endif
diff --git a/media/libopus/celt/x86/x86cpu.c b/media/libopus/celt/x86/x86cpu.c
new file mode 100644
index 0000000000..555a576b8a
--- /dev/null
+++ b/media/libopus/celt/x86/x86cpu.c
@@ -0,0 +1,157 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cpu_support.h"
+#include "macros.h"
+#include "main.h"
+#include "pitch.h"
+#include "x86cpu.h"
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+
+
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
+{
+	__cpuid((int*)CPUInfo, InfoType);
+}
+
+#else
+
+#if defined(CPU_INFO_BY_C)
+#include <cpuid.h>
+#endif
+
+static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
+{
+#if defined(CPU_INFO_BY_ASM)
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
+    __asm__ __volatile__ (
+        "xchg %%ebx, %1\n"
+        "cpuid\n"
+        "xchg %%ebx, %1\n":
+        "=a" (CPUInfo[0]),
+        "=r" (CPUInfo[1]),
+        "=c" (CPUInfo[2]),
+        "=d" (CPUInfo[3]) :
+        "0" (InfoType)
+    );
+#else
+    __asm__ __volatile__ (
+        "cpuid":
+        "=a" (CPUInfo[0]),
+        "=b" (CPUInfo[1]),
+        "=c" (CPUInfo[2]),
+        "=d" (CPUInfo[3]) :
+        "0" (InfoType)
+    );
+#endif
+#elif defined(CPU_INFO_BY_C)
+    __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
+#endif
+}
+
+#endif
+
+typedef struct CPU_Feature{
+    /*  SIMD: 128-bit */
+    int HW_SSE;
+    int HW_SSE2;
+    int HW_SSE41;
+    /*  SIMD: 256-bit */
+    int HW_AVX;
+} CPU_Feature;
+
+static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
+{
+    unsigned int info[4] = {0};
+    unsigned int nIds = 0;
+
+    cpuid(info, 0);
+    nIds = info[0];
+
+    if (nIds >= 1){
+        cpuid(info, 1);
+        cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
+        cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
+        cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
+        cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+    }
+    else {
+        cpu_feature->HW_SSE = 0;
+        cpu_feature->HW_SSE2 = 0;
+        cpu_feature->HW_SSE41 = 0;
+        cpu_feature->HW_AVX = 0;
+    }
+}
+
+int opus_select_arch(void)
+{
+    CPU_Feature cpu_feature;
+    int arch;
+
+    opus_cpu_feature_check(&cpu_feature);
+
+    arch = 0;
+    if (!cpu_feature.HW_SSE)
+    {
+       return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_SSE2)
+    {
+       return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_SSE41)
+    {
+        return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_AVX)
+    {
+        return arch;
+    }
+    arch++;
+
+    return arch;
+}
+
+#endif
diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h
new file mode 100644
index 0000000000..04fd48aac4
--- /dev/null
+++ b/media/libopus/celt/x86/x86cpu.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(X86CPU_H)
+# define X86CPU_H
+
+# if defined(OPUS_X86_MAY_HAVE_SSE)
+#  define MAY_HAVE_SSE(name) name ## _sse
+# else
+#  define MAY_HAVE_SSE(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_SSE2)
+#  define MAY_HAVE_SSE2(name) name ## _sse2
+# else
+#  define MAY_HAVE_SSE2(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#  define MAY_HAVE_SSE4_1(name) name ## _sse4_1
+# else
+#  define MAY_HAVE_SSE4_1(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_AVX)
+#  define MAY_HAVE_AVX(name) name ## _avx
+# else
+#  define MAY_HAVE_AVX(name) name ## _c
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+# endif
+
+/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
+  or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
+  actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
+  reference, these require 16-byte alignment and load a full 16 bytes (instead
+  of 4 or 8), possibly reading out of bounds.
+
+  We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
+  _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
+  reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
+  optimize this out when optimizations ARE enabled.
+
+  Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
+  (which is fair, since technically the compiler is always allowed to do the
+  dereference before invoking the function implementing the intrinsic).
+  However, it is smart enough to eliminate the extra MOVD instruction.
+  For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
+  the extra MOVQ if it's specified explicitly */
+
+# if defined(__clang__) || !defined(__OPTIMIZE__)
+#  define OP_CVTEPI8_EPI32_M32(x) \
+ (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
+# else
+#  define OP_CVTEPI8_EPI32_M32(x) \
+ (_mm_cvtepi8_epi32(*(__m128i *)(x)))
+#endif
+
+# if !defined(__OPTIMIZE__)
+#  define OP_CVTEPI16_EPI32_M64(x) \
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
+# else
+#  define OP_CVTEPI16_EPI32_M64(x) \
+ (_mm_cvtepi16_epi32(*(__m128i *)(x)))
+# endif
+
+#endif
diff --git a/media/libopus/gen-sources.py b/media/libopus/gen-sources.py
index a861607b25..00c180940b 100644
--- a/media/libopus/gen-sources.py
+++ b/media/libopus/gen-sources.py
@@ -26,7 +26,7 @@ def generate_sources_mozbuild(path):
         'silk_sources.mk',
     ]
 
-    var_definition = re.compile('([A-Z_]*) = (.*)')
+    var_definition = re.compile('([A-Z0-9_]*) = (.*)')
     with open('sources.mozbuild', 'w') as output:
 
         output.write('# THIS FILE WAS AUTOMATICALLY GENERATED BY %s. DO NOT EDIT.\n' % sys.argv[0])
diff --git a/media/libopus/include/opus.h b/media/libopus/include/opus.h
index 93a53a2ffc..b0bdf6f2df 100644
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -616,7 +616,10 @@ OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, fl
   * merged. Splitting valid Opus packets is always guaranteed to succeed,
   * whereas merging valid packets only succeeds if all frames have the same
   * mode, bandwidth, and frame size, and when the total duration of the merged
-  * packet is no more than 120 ms.
+  * packet is no more than 120 ms. The 120 ms limit comes from the
+  * specification and limits decoder memory requirements at a point where
+  * framing overhead becomes negligible.
+  *
   * The repacketizer currently only operates on elementary Opus
   * streams. It will not manipualte multistream packets successfully, except in
   * the degenerate case where they consist of data from a single stream.
diff --git a/media/libopus/include/opus_defines.h b/media/libopus/include/opus_defines.h
index 265089f65e..647ed5d6f2 100644
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -46,7 +46,7 @@ extern "C" {
 #define OPUS_OK                0
 /** One or more invalid/out of range arguments @hideinitializer*/
 #define OPUS_BAD_ARG          -1
-/** The mode struct passed is invalid @hideinitializer*/
+/** Not enough bytes allocated in the buffer @hideinitializer*/
 #define OPUS_BUFFER_TOO_SMALL -2
 /** An internal error was detected @hideinitializer*/
 #define OPUS_INTERNAL_ERROR   -3
@@ -274,7 +274,6 @@ extern "C" {
 /** Enables or disables variable bitrate (VBR) in the encoder.
   * The configured bitrate may not be met exactly because frames must
   * be an integer number of bytes in length.
-  * @warning Only the MDCT mode of Opus can provide hard CBR behavior.
   * @see OPUS_GET_VBR
   * @see OPUS_SET_VBR_CONSTRAINT
   * @param[in] x <tt>opus_int32</tt>: Allowed values:
@@ -454,14 +453,6 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
 
-/** Gets the sampling rate the encoder or decoder was initialized with.
-  * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
-  * or opus_decoder_init().
-  * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
-  * @hideinitializer
-  */
-#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
-
 /** Gets the total samples of delay added by the entire codec.
   * This can be queried by the encoder and then the provided number of samples can be
   * skipped on from the start of the decoder's output to provide time aligned input
@@ -498,9 +489,9 @@ extern "C" {
 #define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
 
 /** Configures the encoder's expected packet loss percentage.
-  * Higher values with trigger progressively more loss resistant behavior in the encoder
-  * at the expense of quality at a given bitrate in the lossless case, but greater quality
-  * under loss.
+  * Higher values trigger progressively more loss resistant behavior in the encoder
+  * at the expense of quality at a given bitrate in the absence of packet loss, but
+  * greater quality under loss.
   * @see OPUS_GET_PACKET_LOSS_PERC
   * @param[in] x <tt>opus_int32</tt>:   Loss percentage in the range 0-100, inclusive (default: 0).
   * @hideinitializer */
@@ -532,7 +523,19 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
 /** Configures the depth of signal being encoded.
+  *
   * This is a hint which helps the encoder identify silence and near-silence.
+  * It represents the number of significant bits of linear intensity below
+  * which the signal contains ignorable quantization or other noise.
+  *
+  * For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting
+  * for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate
+  * for 16-bit linear pcm input with opus_encode_float().
+  *
+  * When using opus_encode() instead of opus_encode_float(), or when libopus
+  * is compiled for fixed-point, the encoder uses the minimum of the value
+  * set here and the value 16.
+  *
   * @see OPUS_GET_LSB_DEPTH
   * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
   *                                   (default: 24).
@@ -545,11 +548,6 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
 
-/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
-  * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
-  * @hideinitializer */
-#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
-
 /** Configures the encoder's use of variable duration frames.
   * When variable duration is enabled, the encoder is free to use a shorter frame
   * size than the one requested in the opus_encode*() call.
@@ -558,12 +556,12 @@ extern "C" {
   * packet. The part of the audio that was not encoded needs to be resent to the
   * encoder for the next call. Do not use this option unless you <b>really</b>
   * know what you are doing.
-  * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+  * @see OPUS_GET_EXPERT_FRAME_DURATION
   * @param[in] x <tt>opus_int32</tt>: Allowed values:
   * <dl>
   * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
   * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
-  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
@@ -573,12 +571,12 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
 /** Gets the encoder's configured use of variable duration frames.
-  * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+  * @see OPUS_SET_EXPERT_FRAME_DURATION
   * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
   * <dl>
   * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
   * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
-  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
@@ -589,10 +587,22 @@ extern "C" {
 #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
 
 /** If set to 1, disables almost all use of prediction, making frames almost
-    completely independent. This reduces quality. (default : 0)
+  * completely independent. This reduces quality.
+  * @see OPUS_GET_PREDICTION_DISABLED
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Enable prediction (default).</dd>
+  * <dt>1</dt><dd>Disable prediction.</dd>
+  * </dl>
   * @hideinitializer */
 #define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
 /** Gets the encoder's configured prediction status.
+  * @see OPUS_SET_PREDICTION_DISABLED
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Prediction enabled (default).</dd>
+  * <dt>1</dt><dd>Prediction disabled.</dd>
+  * </dl>
   * @hideinitializer */
 #define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
 
@@ -649,18 +659,6 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
 
-/** Gets the pitch of the last decoded frame, if available.
-  * This can be used for any post-processing algorithm requiring the use of pitch,
-  * e.g. time stretching/shortening. If the last frame was not voiced, or if the
-  * pitch was not coded in the frame, then zero is returned.
-  *
-  * This CTL is only implemented for decoder instances.
-  *
-  * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
-  *
-  * @hideinitializer */
-#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
-
 /** Gets the encoder's configured bandpass or the decoder's last bandpass.
   * @see OPUS_SET_BANDWIDTH
   * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
@@ -675,6 +673,14 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
 
+/** Gets the sampling rate the encoder or decoder was initialized with.
+  * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
+  * or opus_decoder_init().
+  * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
+  * @hideinitializer
+  */
+#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
+
 /**@}*/
 
 /** @defgroup opus_decoderctls Decoder related CTLs
@@ -699,6 +705,23 @@ extern "C" {
   * @hideinitializer */
 #define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
 
+/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
+  * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
+  * @hideinitializer */
+#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the pitch of the last decoded frame, if available.
+  * This can be used for any post-processing algorithm requiring the use of pitch,
+  * e.g. time stretching/shortening. If the last frame was not voiced, or if the
+  * pitch was not coded in the frame, then zero is returned.
+  *
+  * This CTL is only implemented for decoder instances.
+  *
+  * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
+  *
+  * @hideinitializer */
+#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
+
 /**@}*/
 
 /** @defgroup opus_libinfo Opus library information functions
@@ -713,6 +736,10 @@ extern "C" {
 OPUS_EXPORT const char *opus_strerror(int error);
 
 /** Gets the libopus version string.
+  *
+  * Applications may look for the substring "-fixed" in the version string to
+  * determine whether they have a fixed-point or floating-point build at
+  * runtime.
   *
   * @returns Version string
   */
diff --git a/media/libopus/include/opus_multistream.h b/media/libopus/include/opus_multistream.h
index ae5997934a..47e03900bd 100644
--- a/media/libopus/include/opus_multistream.h
+++ b/media/libopus/include/opus_multistream.h
@@ -111,9 +111,9 @@ extern "C" {
   * duration, can be computed without any special negotiation.
   *
   * The format for multistream Opus packets is defined in the
-  * <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
+  * <a href="https://tools.ietf.org/html/draft-ietf-codec-oggopus">Ogg
   * encapsulation specification</a> and is based on the self-delimited Opus
-  * framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+  * framing described in Appendix B of <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
   * Normal Opus packets are just a degenerate case of multistream Opus packets,
   * and can be encoded or decoded with the multistream API by setting
   * <code>streams</code> to <code>1</code> when initializing the encoder or
@@ -140,7 +140,7 @@ extern "C" {
   *
   * The output channels specified by the encoder
   * should use the
-  * <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+  * <a href="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">Vorbis
   * channel ordering</a>. A decoder may wish to apply an additional permutation
   * to the mapping the encoder used to achieve a different output channel
   * order (e.g. for outputing in WAV order).
diff --git a/media/libopus/moz.build b/media/libopus/moz.build
index 90a6fa5927..9133257a93 100644
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -19,7 +19,7 @@ ALLOW_COMPILER_WARNINGS = True
 FINAL_LIBRARY = 'gkmedias'
 
 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.1-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.1.2-mozilla"'
 DEFINES['USE_ALLOCA'] = True
 
 # We only need to export symbols if we're built into libgkmedias
@@ -71,9 +71,9 @@ LOCAL_INCLUDES += [
 include('sources.mozbuild')
 
 UNIFIED_SOURCES += celt_sources
-SOURCES += celt_nonunified_sources
 UNIFIED_SOURCES += silk_sources
 UNIFIED_SOURCES += opus_sources
+SOURCES += opus_nonunified_sources
 
 if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
     LOCAL_INCLUDES += [
diff --git a/media/libopus/nonunified.patch b/media/libopus/nonunified.patch
new file mode 100644
index 0000000000..cf9ff554fb
--- /dev/null
+++ b/media/libopus/nonunified.patch
@@ -0,0 +1,38 @@
+diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild
+index 8a39b9f..dfc2c62 100644
+--- a/media/libopus/sources.mozbuild
++++ b/media/libopus/sources.mozbuild
+@@ -2,8 +2,6 @@
+ celt_sources = [
+     'celt/bands.c',
+     'celt/celt.c',
+-    'celt/celt_decoder.c',
+-    'celt/celt_encoder.c',
+     'celt/celt_lpc.c',
+     'celt/cwrs.c',
+     'celt/entcode.c',
+@@ -20,6 +18,15 @@ celt_sources = [
+     'celt/vq.c',
+ ]
+ 
++opus_nonunified_sources = [
++    # Disabled because of name clash of opus_custom_encoder_get_size.
++    'celt/celt_decoder.c',
++    'celt/celt_encoder.c',
++    # Disabled for (safe) warning about QA redefinition.
++    'silk/LPC_inv_pred_gain.c',
++    'silk/NLSF2A.c',
++]
++
+ celt_sources_sse = [
+     'celt/x86/pitch_sse.c',
+     'celt/x86/x86_celt_map.c',
+@@ -105,8 +112,6 @@ silk_sources = [
+     'silk/log2lin.c',
+     'silk/LP_variable_cutoff.c',
+     'silk/LPC_analysis_filter.c',
+-    'silk/LPC_inv_pred_gain.c',
+-    'silk/NLSF2A.c',
+     'silk/NLSF_decode.c',
+     'silk/NLSF_del_dec_quant.c',
+     'silk/NLSF_encode.c',
diff --git a/media/libopus/silk/A2NLSF.c b/media/libopus/silk/A2NLSF.c
index 74b1b95d6f..b6e9e5ffcc 100644
--- a/media/libopus/silk/A2NLSF.c
+++ b/media/libopus/silk/A2NLSF.c
@@ -71,8 +71,23 @@ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial ev
 
     y32 = p[ dd ];                                  /* Q16 */
     x_Q16 = silk_LSHIFT( x, 4 );
-    for( n = dd - 1; n >= 0; n-- ) {
-        y32 = silk_SMLAWW( p[ n ], y32, x_Q16 );    /* Q16 */
+
+    if ( opus_likely( 8 == dd ) )
+    {
+        y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 );
+        y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 );
+    }
+    else
+    {
+        for( n = dd - 1; n >= 0; n-- ) {
+            y32 = silk_SMLAWW( p[ n ], y32, x_Q16 );    /* Q16 */
+        }
     }
     return y32;
 }
diff --git a/media/libopus/silk/API.h b/media/libopus/silk/API.h
index f0601bcf6b..0131acbb08 100644
--- a/media/libopus/silk/API.h
+++ b/media/libopus/silk/API.h
@@ -111,7 +111,8 @@ opus_int silk_Decode(                                   /* O    Returns error co
     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
-    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+    opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
+    int                             arch                /* I    Run-time architecture                           */
 );
 
 #if 0
diff --git a/media/libopus/silk/CNG.c b/media/libopus/silk/CNG.c
index 8481d95dbe..61787c2302 100644
--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
+    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
     opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
@@ -55,7 +55,7 @@ static OPUS_INLINE void silk_CNG_exc(
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
     }
     *rand_seed = seed;
 }
@@ -85,7 +85,7 @@ void silk_CNG(
 )
 {
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16;
+    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
@@ -125,11 +125,20 @@ void silk_CNG(
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
         VARDECL( opus_int32, CNG_sig_Q10 );
-
         ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
 
         /* Generate CNG excitation */
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+        gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
+        if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
+            gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
+            gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+            gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
+        } else {
+            gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
+            gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+            gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
+        }
+        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
@@ -162,7 +171,7 @@ void silk_CNG(
             /* Update states */
             CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
 
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
         }
         silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
diff --git a/media/libopus/silk/LPC_analysis_filter.c b/media/libopus/silk/LPC_analysis_filter.c
index 9d1f16cb7d..20906673ff 100644
--- a/media/libopus/silk/LPC_analysis_filter.c
+++ b/media/libopus/silk/LPC_analysis_filter.c
@@ -44,7 +44,8 @@ void silk_LPC_analysis_filter(
     const opus_int16            *in,                /* I    Input signal                                                */
     const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
     const opus_int32            len,                /* I    Signal length                                               */
-    const opus_int32            d                   /* I    Filter order                                                */
+    const opus_int32            d,                  /* I    Filter order                                                */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   j;
@@ -69,11 +70,12 @@ void silk_LPC_analysis_filter(
     for (j=0;j<d;j++) {
         mem[ j ] = in[ d - j - 1 ];
     }
-    celt_fir( in + d, num, out + d, len - d, d, mem );
+    celt_fir( in + d, num, out + d, len - d, d, mem, arch );
     for ( j = 0; j < d; j++ ) {
         out[ j ] = 0;
     }
 #else
+    (void)arch;
     for( ix = d; ix < len; ix++ ) {
         in_ptr = &in[ ix - 1 ];
 
diff --git a/media/libopus/silk/NLSF_del_dec_quant.c b/media/libopus/silk/NLSF_del_dec_quant.c
index 504dbbd040..c3b9efccfa 100644
--- a/media/libopus/silk/NLSF_del_dec_quant.c
+++ b/media/libopus/silk/NLSF_del_dec_quant.c
@@ -56,6 +56,28 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
     opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
     const opus_uint8 *rates_Q5;
 
+    opus_int out0_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
+    opus_int out1_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
+
+    for (i = -NLSF_QUANT_MAX_AMPLITUDE_EXT; i <= NLSF_QUANT_MAX_AMPLITUDE_EXT-1; i++)
+    {
+        out0_Q10 = silk_LSHIFT( i, 10 );
+        out1_Q10 = silk_ADD16( out0_Q10, 1024 );
+        if( i > 0 ) {
+            out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        } else if( i == 0 ) {
+            out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        } else if( i == -1 ) {
+            out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        } else {
+            out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        }
+        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
+        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+    }
+
     silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
 
     nStates = 1;
@@ -73,21 +95,9 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
             ind[ j ][ i ] = (opus_int8)ind_tmp;
 
             /* compute outputs for ind_tmp and ind_tmp + 1 */
-            out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
-            out1_Q10 = silk_ADD16( out0_Q10, 1024 );
-            if( ind_tmp > 0 ) {
-                out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-            } else if( ind_tmp == 0 ) {
-                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-            } else if( ind_tmp == -1 ) {
-                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-            } else {
-                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-                out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
-            }
-            out0_Q10  = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
-            out1_Q10  = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+            out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
+            out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
+
             out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
             out1_Q10  = silk_ADD16( out1_Q10, pred_Q10 );
             prev_out_Q10[ j           ] = out0_Q10;
diff --git a/media/libopus/silk/NSQ.c b/media/libopus/silk/NSQ.c
index cf5b3fd547..a065884070 100644
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -46,6 +46,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
     const opus_int      signal_type             /* I    Signal type                     */
 );
 
+#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 static OPUS_INLINE void silk_noise_shape_quantizer(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     opus_int            signalType,             /* I    Signal type                     */
@@ -67,8 +68,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
     opus_int            predictLPCOrder         /* I    Prediction filter order         */
 );
+#endif
 
-void silk_NSQ(
+void silk_NSQ_c
+(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -141,7 +144,7 @@ void silk_NSQ(
                 silk_assert( start_idx > 0 );
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
-                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
 
                 NSQ->rewhite_flag = 1;
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
@@ -172,7 +175,11 @@ void silk_NSQ(
 /***********************************/
 /* silk_noise_shape_quantizer  */
 /***********************************/
-static OPUS_INLINE void silk_noise_shape_quantizer(
+
+#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+static OPUS_INLINE
+#endif
+void silk_noise_shape_quantizer(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     opus_int            signalType,             /* I    Signal type                     */
     const opus_int32    x_sc_Q10[],             /* I                                    */
diff --git a/media/libopus/silk/NSQ_del_dec.c b/media/libopus/silk/NSQ_del_dec.c
index 522be40663..aff560c221 100644
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -57,6 +57,9 @@ typedef struct {
 
 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
 
+#if defined(MIPSr1_ASM)
+#include "mips/NSQ_del_dec_mipsr1.h"
+#endif
 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
     const silk_encoder_state *psEncC,               /* I    Encoder State                       */
     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
@@ -106,7 +109,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     opus_int            decisionDelay           /* I                                        */
 );
 
-void silk_NSQ_del_dec(
+void silk_NSQ_del_dec_c(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -244,7 +247,7 @@ void silk_NSQ_del_dec(
                 silk_assert( start_idx > 0 );
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
-                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
 
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
                 NSQ->rewhite_flag = 1;
@@ -303,6 +306,7 @@ void silk_NSQ_del_dec(
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
+#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec
 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
     NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
@@ -629,6 +633,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     }
     RESTORE_STACK;
 }
+#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */
 
 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
     const silk_encoder_state *psEncC,               /* I    Encoder State                       */
diff --git a/media/libopus/silk/PLC.c b/media/libopus/silk/PLC.c
index 01f40014c4..34a94bc313 100644
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -46,7 +46,8 @@ static OPUS_INLINE void silk_PLC_update(
 static OPUS_INLINE void silk_PLC_conceal(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
-    opus_int16                          frame[]             /* O LPC residual signal    */
+    opus_int16                          frame[],            /* O LPC residual signal    */
+    int                                 arch                /* I  Run-time architecture */
 );
 
 
@@ -65,7 +66,8 @@ void silk_PLC(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
     opus_int16                          frame[],            /* I/O  signal              */
-    opus_int                            lost                /* I Loss flag              */
+    opus_int                            lost,               /* I Loss flag              */
+    int                                 arch                /* I Run-time architecture  */
 )
 {
     /* PLC control function */
@@ -78,7 +80,7 @@ void silk_PLC(
         /****************************/
         /* Generate Signal          */
         /****************************/
-        silk_PLC_conceal( psDec, psDecCtrl, frame );
+        silk_PLC_conceal( psDec, psDecCtrl, frame, arch );
 
         psDec->lossCnt++;
     } else {
@@ -165,10 +167,35 @@ static OPUS_INLINE void silk_PLC_update(
     psPLC->nb_subfr = psDec->nb_subfr;
 }
 
+static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2,
+      const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr)
+{
+    int i, k;
+    VARDECL( opus_int16, exc_buf );
+    opus_int16 *exc_buf_ptr;
+    SAVE_STACK;
+    ALLOC( exc_buf, 2*subfr_length, opus_int16 );
+    /* Find random noise component */
+    /* Scale previous excitation signal */
+    exc_buf_ptr = exc_buf;
+    for( k = 0; k < 2; k++ ) {
+        for( i = 0; i < subfr_length; i++ ) {
+            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+                silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+        }
+        exc_buf_ptr += subfr_length;
+    }
+    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+    silk_sum_sqr_shift( energy1, shift1, exc_buf,                  subfr_length );
+    silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length );
+    RESTORE_STACK;
+}
+
 static OPUS_INLINE void silk_PLC_conceal(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
-    opus_int16                          frame[]             /* O LPC residual signal    */
+    opus_int16                          frame[],            /* O LPC residual signal    */
+    int                                 arch                /* I Run-time architecture  */
 )
 {
     opus_int   i, j, k;
@@ -177,19 +204,26 @@ static OPUS_INLINE void silk_PLC_conceal(
     opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
     opus_int32 LPC_pred_Q10, LTP_pred_Q12;
     opus_int16 rand_scale_Q14;
-    opus_int16 *B_Q14, *exc_buf_ptr;
+    opus_int16 *B_Q14;
     opus_int32 *sLPC_Q14_ptr;
-    VARDECL( opus_int16, exc_buf );
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
+#ifdef SMALL_FOOTPRINT
+    opus_int16 *sLTP;
+#else
     VARDECL( opus_int16, sLTP );
+#endif
     VARDECL( opus_int32, sLTP_Q14 );
     silk_PLC_struct *psPLC = &psDec->sPLC;
     opus_int32 prevGain_Q10[2];
     SAVE_STACK;
 
-    ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
-    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
     ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+#ifdef SMALL_FOOTPRINT
+    /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */
+    sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length;
+#else
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+#endif
 
     prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
     prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@@ -198,19 +232,7 @@ static OPUS_INLINE void silk_PLC_conceal(
        silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
     }
 
-    /* Find random noise component */
-    /* Scale previous excitation signal */
-    exc_buf_ptr = exc_buf;
-    for( k = 0; k < 2; k++ ) {
-        for( i = 0; i < psPLC->subfr_length; i++ ) {
-            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
-                silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
-        }
-        exc_buf_ptr += psPLC->subfr_length;
-    }
-    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
-    silk_sum_sqr_shift( &energy1, &shift1, exc_buf,                         psPLC->subfr_length );
-    silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+    silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr);
 
     if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
         /* First sub-frame has lowest energy */
@@ -270,7 +292,7 @@ static OPUS_INLINE void silk_PLC_conceal(
     /* Rewhiten LTP state */
     idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
     silk_assert( idx > 0 );
-    silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
+    silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch );
     /* Scale LTP state */
     inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
     inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
diff --git a/media/libopus/silk/PLC.h b/media/libopus/silk/PLC.h
index f1e2eccc69..6438f51633 100644
--- a/media/libopus/silk/PLC.h
+++ b/media/libopus/silk/PLC.h
@@ -48,7 +48,8 @@ void silk_PLC(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
     opus_int16                          frame[],            /* I/O  signal              */
-    opus_int                            lost                /* I Loss flag              */
+    opus_int                            lost,               /* I Loss flag              */
+    int                                 arch                /* I Run-time architecture  */
 );
 
 void silk_PLC_glue_frames(
diff --git a/media/libopus/silk/SigProc_FIX.h b/media/libopus/silk/SigProc_FIX.h
index 1b58057910..b63299441e 100644
--- a/media/libopus/silk/SigProc_FIX.h
+++ b/media/libopus/silk/SigProc_FIX.h
@@ -41,7 +41,11 @@ extern "C"
 #include "typedef.h"
 #include "resampler_structs.h"
 #include "macros.h"
+#include "cpu_support.h"
 
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#include "x86/SigProc_FIX_sse.h"
+#endif
 
 /********************************************************************/
 /*                    SIGNAL PROCESSING FUNCTIONS                   */
@@ -108,7 +112,8 @@ void silk_LPC_analysis_filter(
     const opus_int16            *in,                /* I    Input signal                                                */
     const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
     const opus_int32            len,                /* I    Signal length                                               */
-    const opus_int32            d                   /* I    Filter order                                                */
+    const opus_int32            d,                  /* I    Filter order                                                */
+    int                         arch                /* I    Run-time architecture                                       */
 );
 
 /* Chirp (bandwidth expand) LP AR filter */
@@ -303,7 +308,7 @@ void silk_NLSF_VQ_weights_laroia(
 );
 
 /* Compute reflection coefficients from input signal */
-void silk_burg_modified(
+void silk_burg_modified_c(
     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
@@ -335,12 +340,15 @@ void silk_scale_vector32_Q26_lshift_18(
 /********************************************************************/
 
 /*    return sum( inVec1[i] * inVec2[i] ) */
+
 opus_int32 silk_inner_prod_aligned(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
-    const opus_int              len                 /*    I vector lengths                                              */
+    const opus_int              len,                /*    I vector lengths                                              */
+    int                         arch                /*    I Run-time architecture                                       */
 );
 
+
 opus_int32 silk_inner_prod_aligned_scale(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
@@ -348,7 +356,7 @@ opus_int32 silk_inner_prod_aligned_scale(
     const opus_int              len                 /*    I vector lengths                                              */
 );
 
-opus_int64 silk_inner_prod16_aligned_64(
+opus_int64 silk_inner_prod16_aligned_64_c(
     const opus_int16            *inVec1,            /*    I input vector 1                                              */
     const opus_int16            *inVec2,            /*    I input vector 2                                              */
     const opus_int              len                 /*    I vector lengths                                              */
@@ -575,6 +583,14 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 /* the following seems faster on x86 */
 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
 
+#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
+    ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+
+#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
+    ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
+#endif
+
 #include "Inlines.h"
 #include "MacroCount.h"
 #include "MacroDebug.h"
@@ -587,6 +603,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #include "arm/SigProc_FIX_armv5e.h"
 #endif
 
+#if defined(MIPSr1_ASM)
+#include "mips/sigproc_fix_mipsr1.h"
+#endif
+
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/media/libopus/silk/VAD.c b/media/libopus/silk/VAD.c
index a809098143..0a782af2f1 100644
--- a/media/libopus/silk/VAD.c
+++ b/media/libopus/silk/VAD.c
@@ -33,10 +33,12 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 
 /* Silk VAD noise level estimation */
+# if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 static OPUS_INLINE void silk_VAD_GetNoiseLevels(
     const opus_int32             pX[ VAD_N_BANDS ], /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 );
+#endif
 
 /**********************************/
 /* Initialization of the Silk VAD */
@@ -77,7 +79,7 @@ static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -120
 /***************************************/
 /* Get the speech activity level in Q8 */
 /***************************************/
-opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+opus_int silk_VAD_GetSA_Q8_c(                                   /* O    Return value, 0 if success                  */
     silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
     const opus_int16            pIn[]                           /* I    PCM input                                   */
 )
@@ -296,7 +298,10 @@ opus_int silk_VAD_GetSA_Q8(                                     /* O    Return v
 /**************************/
 /* Noise level estimation */
 /**************************/
-static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+# if  !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+static OPUS_INLINE
+#endif
+void silk_VAD_GetNoiseLevels(
     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 )
diff --git a/media/libopus/silk/VQ_WMat_EC.c b/media/libopus/silk/VQ_WMat_EC.c
index 13d5d34edd..7983f1db80 100644
--- a/media/libopus/silk/VQ_WMat_EC.c
+++ b/media/libopus/silk/VQ_WMat_EC.c
@@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main.h"
 
 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
-void silk_VQ_WMat_EC(
+void silk_VQ_WMat_EC_c(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
@@ -55,7 +55,7 @@ void silk_VQ_WMat_EC(
     *rate_dist_Q14 = silk_int32_MAX;
     cb_row_Q7 = cb_Q7;
     for( k = 0; k < L; k++ ) {
-	    gain_tmp_Q7 = cb_gain_Q7[k];
+        gain_tmp_Q7 = cb_gain_Q7[k];
 
         diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
         diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
@@ -66,8 +66,8 @@ void silk_VQ_WMat_EC(
         /* Weighted rate */
         sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
 
-		/* Penalty for too large gain */
-		sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+        /* Penalty for too large gain */
+        sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
 
         silk_assert( sum1_Q14 >= 0 );
 
@@ -111,7 +111,7 @@ void silk_VQ_WMat_EC(
         if( sum1_Q14 < *rate_dist_Q14 ) {
             *rate_dist_Q14 = sum1_Q14;
             *ind = (opus_int8)k;
-			*gain_Q7 = gain_tmp_Q7;
+            *gain_Q7 = gain_tmp_Q7;
         }
 
         /* Go to next cbk vector */
diff --git a/media/libopus/silk/code_signs.c b/media/libopus/silk/code_signs.c
index 0419ea2626..dfd1dca9a1 100644
--- a/media/libopus/silk/code_signs.c
+++ b/media/libopus/silk/code_signs.c
@@ -74,7 +74,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -83,7 +83,7 @@ void silk_decode_signs(
 {
     opus_int         i, j, p;
     opus_uint8       icdf[ 2 ];
-    opus_int         *q_ptr;
+    opus_int16       *q_ptr;
     const opus_uint8 *icdf_ptr;
 
     icdf[ 1 ] = 0;
diff --git a/media/libopus/silk/control_SNR.c b/media/libopus/silk/control_SNR.c
index f04e69fce2..cee87eb0d8 100644
--- a/media/libopus/silk/control_SNR.c
+++ b/media/libopus/silk/control_SNR.c
@@ -70,11 +70,6 @@ opus_int silk_control_SNR(
                 break;
             }
         }
-
-        /* Reduce coding quality whenever LBRR is enabled, to free up some bits */
-        if( psEncC->LBRR_enabled ) {
-            psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) );
-        }
     }
 
     return ret;
diff --git a/media/libopus/silk/control_codec.c b/media/libopus/silk/control_codec.c
index 1f674bddb6..044eea3f2a 100644
--- a/media/libopus/silk/control_codec.c
+++ b/media/libopus/silk/control_codec.c
@@ -397,9 +397,10 @@ static OPUS_INLINE opus_int silk_setup_LBRR(
     const opus_int32            TargetRate_bps      /* I                        */
 )
 {
-    opus_int   ret = SILK_NO_ERROR;
+    opus_int   LBRR_in_previous_packet, ret = SILK_NO_ERROR;
     opus_int32 LBRR_rate_thres_bps;
 
+    LBRR_in_previous_packet = psEncC->LBRR_enabled;
     psEncC->LBRR_enabled = 0;
     if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
         if( psEncC->fs_kHz == 8 ) {
@@ -413,8 +414,13 @@ static OPUS_INLINE opus_int silk_setup_LBRR(
 
         if( TargetRate_bps > LBRR_rate_thres_bps ) {
             /* Set gain increase for coding LBRR excitation */
+            if( LBRR_in_previous_packet == 0 ) {
+                /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */
+                psEncC->LBRR_GainIncreases = 7;
+            } else {
+                psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
+            }
             psEncC->LBRR_enabled = 1;
-            psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
         }
     }
 
diff --git a/media/libopus/silk/dec_API.c b/media/libopus/silk/dec_API.c
index 4cbcf71514..b7d8ed48d8 100644
--- a/media/libopus/silk/dec_API.c
+++ b/media/libopus/silk/dec_API.c
@@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "API.h"
 #include "main.h"
 #include "stack_alloc.h"
+#include "os_support.h"
 
 /************************/
 /* Decoder Super Struct */
@@ -84,13 +85,15 @@ opus_int silk_Decode(                                   /* O    Returns error co
     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
-    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+    opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
+    int                             arch                /* I    Run-time architecture                           */
 )
 {
     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
     opus_int32 nSamplesOutDec, LBRR_symbol;
     opus_int16 *samplesOut1_tmp[ 2 ];
-    VARDECL( opus_int16, samplesOut1_tmp_storage );
+    VARDECL( opus_int16, samplesOut1_tmp_storage1 );
+    VARDECL( opus_int16, samplesOut1_tmp_storage2 );
     VARDECL( opus_int16, samplesOut2_tmp );
     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     opus_int16 *resample_out_ptr;
@@ -98,6 +101,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
     silk_decoder_state *channel_state = psDec->channel_state;
     opus_int has_side;
     opus_int stereo_to_mono;
+    int delay_stack_alloc;
     SAVE_STACK;
 
     silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@@ -196,7 +200,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
                     if( channel_state[ n ].LBRR_flags[ i ] ) {
-                        opus_int pulses[ MAX_FRAME_LENGTH ];
+                        opus_int16 pulses[ MAX_FRAME_LENGTH ];
                         opus_int condCoding;
 
                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
@@ -251,13 +255,22 @@ opus_int silk_Decode(                                   /* O    Returns error co
         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
     }
 
-    ALLOC( samplesOut1_tmp_storage,
-           decControl->nChannelsInternal*(
-               channel_state[ 0 ].frame_length + 2 ),
+    /* Check if the temp buffer fits into the output PCM buffer. If it fits,
+       we can delay allocating the temp buffer until after the SILK peak stack
+       usage. We need to use a < and not a <= because of the two extra samples. */
+    delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
+          < decControl->API_sampleRate*decControl->nChannelsAPI;
+    ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
+           : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
            opus_int16 );
-    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
-    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
-                           + channel_state[ 0 ].frame_length + 2;
+    if ( delay_stack_alloc )
+    {
+       samplesOut1_tmp[ 0 ] = samplesOut;
+       samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
+    } else {
+       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
+       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
+    }
 
     if( lostFlag == FLAG_DECODE_NORMAL ) {
         has_side = !decode_only_middle;
@@ -284,7 +297,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
             } else {
                 condCoding = CODE_CONDITIONALLY;
             }
-            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
+            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch);
         } else {
             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
         }
@@ -312,6 +325,15 @@ opus_int silk_Decode(                                   /* O    Returns error co
         resample_out_ptr = samplesOut;
     }
 
+    ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
+           ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
+           : ALLOC_NONE,
+           opus_int16 );
+    if ( delay_stack_alloc ) {
+       OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
+       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
+       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
+    }
     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
 
         /* Resample decoded signal to API_sampleRate */
diff --git a/media/libopus/silk/decode_core.c b/media/libopus/silk/decode_core.c
index a820bf11d6..b88991e349 100644
--- a/media/libopus/silk/decode_core.c
+++ b/media/libopus/silk/decode_core.c
@@ -39,7 +39,8 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ],     /* I    Pulse signal                                */
+    int                         arch                            /* I    Run-time architecture                       */
 )
 {
     opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
@@ -147,7 +148,7 @@ void silk_decode_core(
                 }
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
-                    A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
+                    A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch );
 
                 /* After rewhitening the LTP state is unscaled */
                 if( k == 0 ) {
diff --git a/media/libopus/silk/decode_frame.c b/media/libopus/silk/decode_frame.c
index abc00a3d54..a605d95ac6 100644
--- a/media/libopus/silk/decode_frame.c
+++ b/media/libopus/silk/decode_frame.c
@@ -42,18 +42,16 @@ opus_int silk_decode_frame(
     opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
     opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
     opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
-    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+    opus_int                    condCoding,                     /* I    The type of conditional coding to use       */
+    int                         arch                            /* I    Run-time architecture                       */
 )
 {
     VARDECL( silk_decoder_control, psDecCtrl );
     opus_int         L, mv_len, ret = 0;
-    VARDECL( opus_int, pulses );
     SAVE_STACK;
 
     L = psDec->frame_length;
     ALLOC( psDecCtrl, 1, silk_decoder_control );
-    ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
-                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
     psDecCtrl->LTP_scale_Q14 = 0;
 
     /* Safety checks */
@@ -62,6 +60,9 @@ opus_int silk_decode_frame(
     if(   lostFlag == FLAG_DECODE_NORMAL ||
         ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
     {
+        VARDECL( opus_int16, pulses );
+        ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+                       ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
         /*********************************************/
         /* Decode quantization indices of side info  */
         /*********************************************/
@@ -81,12 +82,12 @@ opus_int silk_decode_frame(
         /********************************************************/
         /* Run inverse NSQ                                      */
         /********************************************************/
-        silk_decode_core( psDec, psDecCtrl, pOut, pulses );
+        silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
 
         /********************************************************/
         /* Update PLC state                                     */
         /********************************************************/
-        silk_PLC( psDec, psDecCtrl, pOut, 0 );
+        silk_PLC( psDec, psDecCtrl, pOut, 0, arch );
 
         psDec->lossCnt = 0;
         psDec->prevSignalType = psDec->indices.signalType;
@@ -96,7 +97,7 @@ opus_int silk_decode_frame(
         psDec->first_frame_after_reset = 0;
     } else {
         /* Handle packet loss by extrapolation */
-        silk_PLC( psDec, psDecCtrl, pOut, 1 );
+        silk_PLC( psDec, psDecCtrl, pOut, 1, arch );
     }
 
     /*************************/
@@ -107,16 +108,16 @@ opus_int silk_decode_frame(
     silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
     silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
 
-    /****************************************************************/
-    /* Ensure smooth connection of extrapolated and good frames     */
-    /****************************************************************/
-    silk_PLC_glue_frames( psDec, pOut, L );
-
     /************************************************/
     /* Comfort noise generation / estimation        */
     /************************************************/
     silk_CNG( psDec, psDecCtrl, pOut, L );
 
+    /****************************************************************/
+    /* Ensure smooth connection of extrapolated and good frames     */
+    /****************************************************************/
+    silk_PLC_glue_frames( psDec, pOut, L );
+
     /* Update some decoder state variables */
     psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
 
diff --git a/media/libopus/silk/decode_pulses.c b/media/libopus/silk/decode_pulses.c
index e8a87c2ab7..d6bbec9225 100644
--- a/media/libopus/silk/decode_pulses.c
+++ b/media/libopus/silk/decode_pulses.c
@@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /*********************************************/
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
@@ -44,7 +44,7 @@ void silk_decode_pulses(
 {
     opus_int   i, j, k, iter, abs_q, nLS, RateLevelIndex;
     opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
-    opus_int   *pulses_ptr;
+    opus_int16 *pulses_ptr;
     const opus_uint8 *cdf_ptr;
 
     /*********************/
@@ -69,9 +69,9 @@ void silk_decode_pulses(
         sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 );
 
         /* LSB indication */
-        while( sum_pulses[ i ] == MAX_PULSES + 1 ) {
+        while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) {
             nLshifts[ i ]++;
-            /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */
+            /* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */
             sum_pulses[ i ] = ec_dec_icdf( psRangeDec,
                     silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 );
         }
@@ -84,7 +84,7 @@ void silk_decode_pulses(
         if( sum_pulses[ i ] > 0 ) {
             silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
         } else {
-            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) );
         }
     }
 
diff --git a/media/libopus/silk/define.h b/media/libopus/silk/define.h
index c47aca9f58..19c9b00e25 100644
--- a/media/libopus/silk/define.h
+++ b/media/libopus/silk/define.h
@@ -169,7 +169,7 @@ extern "C"
 #define N_RATE_LEVELS                           10
 
 /* Maximum sum of pulses per shell coding frame */
-#define MAX_PULSES                              16
+#define SILK_MAX_PULSES                         16
 
 #define MAX_MATRIX_SIZE                         MAX_LPC_ORDER /* Max of LPC Order and LTP order */
 
diff --git a/media/libopus/silk/enc_API.c b/media/libopus/silk/enc_API.c
index 43739efc24..f8060286db 100644
--- a/media/libopus/silk/enc_API.c
+++ b/media/libopus/silk/enc_API.c
@@ -165,7 +165,7 @@ opus_int silk_Encode(                                   /* O    Returns error co
     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
 
     /* Check values in encoder control structure */
-    if( ( ret = check_control_input( encControl ) != 0 ) ) {
+    if( ( ret = check_control_input( encControl ) ) != 0 ) {
         silk_assert( 0 );
         RESTORE_STACK;
         return ret;
@@ -376,26 +376,33 @@ opus_int silk_Encode(                                   /* O    Returns error co
                 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
                     silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
                 }
+
+                psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc );
             }
 
             silk_HP_variable_cutoff( psEnc->state_Fxx );
 
             /* Total target bits for packet */
             nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
-            /* Subtract half of the bits already used */
+            /* Subtract bits used for LBRR */
             if( !prefillFlag ) {
-                nBits -= ec_tell( psRangeEnc ) >> 1;
+                nBits -= psEnc->nBitsUsedLBRR;
             }
             /* Divide by number of uncoded frames left in packet */
-            nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded );
+            nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket );
             /* Convert to bits/second */
             if( encControl->payloadSize_ms == 10 ) {
                 TargetRate_bps = silk_SMULBB( nBits, 100 );
             } else {
                 TargetRate_bps = silk_SMULBB( nBits, 50 );
             }
-            /* Subtract fraction of bits in excess of target in previous packets */
+            /* Subtract fraction of bits in excess of target in previous frames and packets */
             TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
+            if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) {
+                /* Compare actual vs target bits so far in this packet */
+                opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+                TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
+            }
             /* Never exceed input bitrate */
             TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
 
diff --git a/media/libopus/silk/encode_pulses.c b/media/libopus/silk/encode_pulses.c
index a4501438d1..ab00264f99 100644
--- a/media/libopus/silk/encode_pulses.c
+++ b/media/libopus/silk/encode_pulses.c
@@ -142,7 +142,7 @@ void silk_encode_pulses(
         sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ];
         for( i = 0; i < iter; i++ ) {
             if( nRshifts[ i ] > 0 ) {
-                sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ];
+                sumBits_Q5 += nBits_ptr[ SILK_MAX_PULSES + 1 ];
             } else {
                 sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ];
             }
@@ -162,9 +162,9 @@ void silk_encode_pulses(
         if( nRshifts[ i ] == 0 ) {
             ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 );
         } else {
-            ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 );
+            ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, cdf_ptr, 8 );
             for( k = 0; k < nRshifts[ i ] - 1; k++ ) {
-                ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+                ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
             }
             ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
         }
diff --git a/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c b/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c
index a94190808e..5574e7069f 100644
--- a/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c
+++ b/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -45,7 +45,7 @@ void silk_LTP_analysis_filter_FIX(
     const opus_int16 *x_ptr, *x_lag_ptr;
     opus_int16   Btmp_Q14[ LTP_ORDER ];
     opus_int16   *LTP_res_ptr;
-    opus_int     k, i, j;
+    opus_int     k, i;
     opus_int32   LTP_est;
 
     x_ptr = x;
@@ -53,9 +53,12 @@ void silk_LTP_analysis_filter_FIX(
     for( k = 0; k < nb_subfr; k++ ) {
 
         x_lag_ptr = x_ptr - pitchL[ k ];
-        for( i = 0; i < LTP_ORDER; i++ ) {
-            Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
-        }
+
+        Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ];
+        Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ];
+        Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ];
+        Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ];
+        Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ];
 
         /* LTP analysis FIR filter */
         for( i = 0; i < subfr_length + pre_length; i++ ) {
@@ -63,9 +66,11 @@ void silk_LTP_analysis_filter_FIX(
 
             /* Long-term prediction */
             LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
-            for( j = 1; j < LTP_ORDER; j++ ) {
-                LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
-            }
+            LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] );
+            LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] );
+            LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] );
+            LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] );
+
             LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
 
             /* Subtract long-term prediction */
diff --git a/media/libopus/silk/fixed/burg_modified_FIX.c b/media/libopus/silk/fixed/burg_modified_FIX.c
index db348295bf..4878553b65 100644
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define MAX_RSHIFTS                 (32 - QA)
 
 /* Compute reflection coefficients from input signal */
-void silk_burg_modified(
+void silk_burg_modified_c(
     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
@@ -54,7 +54,7 @@ void silk_burg_modified(
     int                         arch                /* I    Run-time architecture                                       */
 )
 {
-    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int         k, n, s, lz, rshifts, reached_max_gain;
     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
     const opus_int16 *x_ptr;
     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
@@ -63,27 +63,23 @@ void silk_burg_modified(
     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+    opus_int64       C0_64;
 
     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
 
     /* Compute autocorrelations, added over subframes */
-    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
-    if( rshifts > MAX_RSHIFTS ) {
-        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
-        silk_assert( C0 > 0 );
-        rshifts = MAX_RSHIFTS;
+    C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
+    lz = silk_CLZ64(C0_64);
+    rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
+    if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
+    if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
+
+    if (rshifts > 0) {
+        C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );
     } else {
-        lz = silk_CLZ32( C0 ) - 1;
-        rshifts_extra = N_BITS_HEAD_ROOM - lz;
-        if( rshifts_extra > 0 ) {
-            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
-            C0 = silk_RSHIFT32( C0, rshifts_extra );
-        } else {
-            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
-            C0 = silk_LSHIFT32( C0, -rshifts_extra );
-        }
-        rshifts += rshifts_extra;
+        C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
     }
+
     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
     if( rshifts > 0 ) {
@@ -91,7 +87,7 @@ void silk_burg_modified(
             x_ptr = x + s * subfr_length;
             for( n = 1; n < D + 1; n++ ) {
                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
-                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
+                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
             }
         }
     } else {
@@ -252,12 +248,12 @@ void silk_burg_modified(
         if( rshifts > 0 ) {
             for( s = 0; s < nb_subfr; s++ ) {
                 x_ptr = x + s * subfr_length;
-                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
+                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
             }
         } else {
             for( s = 0; s < nb_subfr; s++ ) {
                 x_ptr = x + s * subfr_length;
-                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
+                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts);
             }
         }
         /* Approximate residual energy */
diff --git a/media/libopus/silk/fixed/corrMatrix_FIX.c b/media/libopus/silk/fixed/corrMatrix_FIX.c
index c617270536..c1d437c785 100644
--- a/media/libopus/silk/fixed/corrMatrix_FIX.c
+++ b/media/libopus/silk/fixed/corrMatrix_FIX.c
@@ -42,7 +42,8 @@ void silk_corrVector_FIX(
     const opus_int                  L,                                      /* I    Length of vectors                                                           */
     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
     opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
-    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+    const opus_int                  rshifts,                                /* I    Right shifts of correlations                                                */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 )
 {
     opus_int         lag, i;
@@ -65,7 +66,7 @@ void silk_corrVector_FIX(
     } else {
         silk_assert( rshifts == 0 );
         for( lag = 0; lag < order; lag++ ) {
-            Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
+            Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */
             ptr1--; /* Go to next column of X */
         }
     }
@@ -78,7 +79,8 @@ void silk_corrMatrix_FIX(
     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
     const opus_int                  head_room,                              /* I    Desired headroom                                                            */
     opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
-    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+    opus_int                        *rshifts,                               /* I/O  Right shifts of correlations                                                */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 )
 {
     opus_int         i, j, lag, rshifts_local, head_room_rshifts;
@@ -138,7 +140,7 @@ void silk_corrMatrix_FIX(
     } else {
         for( lag = 1; lag < order; lag++ ) {
             /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
-            energy = silk_inner_prod_aligned( ptr1, ptr2, L );
+            energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch );
             matrix_ptr( XX, lag, 0, order ) = energy;
             matrix_ptr( XX, 0, lag, order ) = energy;
             /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
diff --git a/media/libopus/silk/fixed/encode_frame_FIX.c b/media/libopus/silk/fixed/encode_frame_FIX.c
index b490986b97..5ef44b03fc 100644
--- a/media/libopus/silk/fixed/encode_frame_FIX.c
+++ b/media/libopus/silk/fixed/encode_frame_FIX.c
@@ -48,7 +48,7 @@ void silk_encode_do_VAD_FIX(
     /****************************/
     /* Voice Activity Detection */
     /****************************/
-    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
 
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
@@ -196,11 +196,13 @@ opus_int silk_encode_frame_FIX(
                 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
                     silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
                            sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
-                           sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                           sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
+                           psEnc->sCmn.arch );
                 } else {
                     silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
                             sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
-                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
+                            psEnc->sCmn.arch);
                 }
 
                 /****************************************/
@@ -287,7 +289,7 @@ opus_int silk_encode_frame_FIX(
             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
                 sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
             }
- 
+
             /* Quantize gains */
             psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
             silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
@@ -371,12 +373,12 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
             silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
                 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
                 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
-                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
         } else {
             silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
                 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
                 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
-                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
         }
 
         /* Restore original gains */
diff --git a/media/libopus/silk/fixed/find_LPC_FIX.c b/media/libopus/silk/fixed/find_LPC_FIX.c
index 783d32e20f..e11cdc86e6 100644
--- a/media/libopus/silk/fixed/find_LPC_FIX.c
+++ b/media/libopus/silk/fixed/find_LPC_FIX.c
@@ -95,7 +95,7 @@ void silk_find_LPC_FIX(
             silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
 
             /* Calculate residual energy with NLSF interpolation */
-            silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
+            silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch );
 
             silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder,                subfr_length - psEncC->predictLPCOrder );
             silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );
diff --git a/media/libopus/silk/fixed/find_LTP_FIX.c b/media/libopus/silk/fixed/find_LTP_FIX.c
index 8c4d70376c..1314a28137 100644
--- a/media/libopus/silk/fixed/find_LTP_FIX.c
+++ b/media/libopus/silk/fixed/find_LTP_FIX.c
@@ -50,7 +50,8 @@ void silk_find_LTP_FIX(
     const opus_int                  subfr_length,                           /* I    subframe length                                                             */
     const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
     const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
-    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ],           /* O    right shifts applied to correlations                                        */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 )
 {
     opus_int   i, k, lshift;
@@ -84,10 +85,10 @@ void silk_find_LTP_FIX(
             rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
         }
         corr_rshifts[ k ] = rr_shifts;
-        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] );  /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
+        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch );  /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
 
         /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
-        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] );  /* Rr_fix_ptr   in Q( -corr_rshifts[ k ] ) */
+        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch );  /* Rr_fix_ptr   in Q( -corr_rshifts[ k ] ) */
         if( corr_rshifts[ k ] > rr_shifts ) {
             rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
         }
diff --git a/media/libopus/silk/fixed/find_pitch_lags_FIX.c b/media/libopus/silk/fixed/find_pitch_lags_FIX.c
index 620f8dcd2c..b8440a8247 100644
--- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c
+++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c
@@ -112,7 +112,7 @@ void silk_find_pitch_lags_FIX(
     /*****************************************/
     /* LPC analysis filtering                */
     /*****************************************/
-    silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+    silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch );
 
     if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
         /* Threshold for pitch estimator */
diff --git a/media/libopus/silk/fixed/find_pred_coefs_FIX.c b/media/libopus/silk/fixed/find_pred_coefs_FIX.c
index 5c22f8288b..d308e9cf5f 100644
--- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c
+++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c
@@ -89,11 +89,12 @@ void silk_find_pred_coefs_FIX(
         /* LTP analysis */
         silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
             res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
-            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
+            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch );
 
         /* Quantize LTP gain parameters */
         silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr,
+            psEnc->sCmn.arch);
 
         /* Control LTP scaling */
         silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
@@ -118,16 +119,16 @@ void silk_find_pred_coefs_FIX(
 
         silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
         psEncCtrl->LTPredCodGain_Q7 = 0;
-		psEnc->sCmn.sum_log_gain_Q7 = 0;
+        psEnc->sCmn.sum_log_gain_Q7 = 0;
     }
 
     /* Limit on total predictive coding gain */
     if( psEnc->sCmn.first_frame_after_reset ) {
         minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
-    } else {        
+    } else {
         minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) );      /* Q16 */
-        minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, 
-            silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), 
+        minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
+            silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
                 silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
     }
 
@@ -139,7 +140,7 @@ void silk_find_pred_coefs_FIX(
 
     /* Calculate residual energy using quantized LPC coefficients */
     silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
-        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch );
 
     /* Copy to prediction struct for use in next frame for interpolation */
     silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
diff --git a/media/libopus/silk/fixed/main_FIX.h b/media/libopus/silk/fixed/main_FIX.h
index a56ca07a22..375b5eb32e 100644
--- a/media/libopus/silk/fixed/main_FIX.h
+++ b/media/libopus/silk/fixed/main_FIX.h
@@ -97,6 +97,17 @@ void silk_prefilter_FIX(
     const opus_int16                x[]                                     /* I    Speech signal                                                               */
 );
 
+void silk_warped_LPC_analysis_filter_FIX_c(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+);
+
+
 /**************************/
 /* Noise shaping analysis */
 /**************************/
@@ -166,7 +177,8 @@ void silk_find_LTP_FIX(
     const opus_int                  subfr_length,                           /* I    subframe length                                                             */
     const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
     const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
-    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ],           /* O    right shifts applied to correlations                                        */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 );
 
 void silk_LTP_analysis_filter_FIX(
@@ -190,7 +202,8 @@ void silk_residual_energy_FIX(
     const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
     const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
     const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
-    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+    const opus_int                  LPC_order,                              /* I    LPC order                                                                   */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 );
 
 /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
@@ -220,7 +233,8 @@ void silk_corrMatrix_FIX(
     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
     const opus_int                  head_room,                              /* I    Desired headroom                                                            */
     opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
-    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+    opus_int                        *rshifts,                               /* I/O  Right shifts of correlations                                                */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 );
 
 /* Calculates correlation vector X'*t */
@@ -230,7 +244,8 @@ void silk_corrVector_FIX(
     const opus_int                  L,                                      /* I    Length of vectors                                                           */
     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
     opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
-    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+    const opus_int                  rshifts,                                /* I    Right shifts of correlations                                                */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 );
 
 /* Add noise to matrix diagonal */
diff --git a/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
new file mode 100644
index 0000000000..c30481e437
--- /dev/null
+++ b/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
@@ -0,0 +1,336 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+#define OVERRIDE_silk_noise_shape_analysis_FIX
+
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
+    VARDECL( opus_int16, x_windowed );
+    const opus_int16 *x_ptr, *pitch_res_ptr;
+    SAVE_STACK;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+    /* Reduce coding SNR during low speech activity */
+    if( psEnc->sCmn.useCBR == 0 ) {
+        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness_Q8 = 0;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+        energy_variation_Q7 = 0;
+        log_energy_prev_Q7  = 0;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/
+
+            log_energy_Q7 = silk_lin2log( nrg );
+            if( k > 0 ) {
+                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+            }
+            log_energy_prev_Q7 = log_energy_Q7;
+            pitch_res_ptr += nSamples;
+        }
+
+        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+    } else {
+        warping_Q16 = 0;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+        shift += flat_part;
+        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+        /* Calculate the reflection coefficients using schur */
+        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+        silk_assert( nrg >= 0 );
+
+        /* Convert reflection coefficients to prediction coefficients */
+        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+        Qnrg = -scale;          /* range: -12...30*/
+        silk_assert( Qnrg >= -12 );
+        silk_assert( Qnrg <=  30 );
+
+        /* Make sure that Qnrg is an even number */
+        if( Qnrg & 1 ) {
+            Qnrg -= 1;
+            nrg >>= 1;
+        }
+
+        tmp32 = silk_SQRT_APPROX( nrg );
+        Qnrg >>= 1;             /* range: -6...15*/
+
+        psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \
+                            silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) ));
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+            } else {
+               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+            }
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+        /* Convert from Q24 to Q13 and store in int16 */
+        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+        }
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity and put lower limit on gains */
+    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+    silk_assert( gain_mult_Q16 > 0 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+    }
+
+    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+            /* Pack two coefficients in one int32 */
+            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        }
+        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+    } else {
+        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+        /* Pack two coefficients in one int32 */
+        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+        }
+        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+    } else {
+        HarmShapeGain_Q16 = 0;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+        psShapeSt->HarmBoost_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->HarmShapeGain_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->Tilt_smth_Q16 =
+            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );
+        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );
+    }
+    RESTORE_STACK;
+}
diff --git a/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h
new file mode 100644
index 0000000000..21b256885f
--- /dev/null
+++ b/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h
@@ -0,0 +1,184 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef __PREFILTER_FIX_MIPSR1_H__
+#define __PREFILTER_FIX_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
+void silk_warped_LPC_analysis_filter_FIX(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order,                      /* I    Filter order (even)                 */
+               int              arch
+)
+{
+    opus_int     n, i;
+    opus_int32   acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4;
+    opus_int32   state_cur, state_next;
+
+    (void)arch;
+
+    /* Order must be even */
+    /* Length must be even */
+
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( ( length & 1 ) == 0 );
+
+    for( n = 0; n < length; n+=2 ) {
+        /* Output of lowpass section */
+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+        state_cur = silk_LSHIFT( input[ n ], 14 );
+        /* Output of allpass section */
+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+        state_next = tmp2;
+        acc_Q11 = silk_RSHIFT( order, 1 );
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+
+
+        /* Output of lowpass section */
+        tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 );
+        state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 );
+        /* Output of allpass section */
+        tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+        state[ 1 ] = tmp4;
+        acc_Q22 = silk_RSHIFT( order, 1 );
+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] );
+
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+            state_cur = tmp1;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+            state_next = tmp2;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+
+
+            /* Output of allpass section */
+            tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 );
+            state[ i ] = tmp3;
+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+            state[ i + 1 ] = tmp4;
+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] );
+        }
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+
+        state[ order ] = tmp3;
+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] );
+        res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 );
+    }
+}
+
+
+
+/* Prefilter for finding Quantizer input signal */
+#define OVERRIDE_silk_prefilt_FIX
+static inline void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+)
+{
+    opus_int   i, idx, LTP_shp_buf_idx;
+    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+    opus_int16 *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
+    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;
+
+    if( lag > 0 ) {
+        for( i = 0; i < length; i++ ) {
+            /* unrolled loop */
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+
+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+            xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+        }
+    }
+    else
+    {
+        for( i = 0; i < length; i++ ) {
+
+            n_LTP_Q12 = 0;
+
+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+            xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 );
+        }
+    }
+
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
+    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
+
+#endif /* __PREFILTER_FIX_MIPSR1_H__ */
diff --git a/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
new file mode 100644
index 0000000000..e803ef0fce
--- /dev/null
+++ b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
@@ -0,0 +1,165 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+#undef QC
+#define QC  10
+
+#undef QS
+#define QS  14
+
+/* Autocorrelations for a warped frequency axis */
+#define OVERRIDE_silk_warped_autocorrelation_FIX
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+)
+{
+    opus_int   n, i, lsh;
+    opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0;
+    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 temp64;
+
+    opus_int32 val;
+    val = 2 * QS - QC;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( 2 * QS - QC >= 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n=n+4 ) {
+
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+        start_1 = tmp1_QS;
+        tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS );
+        start_2 = tmp3_QS;
+        tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS );
+        start_3 = tmp5_QS;
+        tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS );
+
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,  start_1);
+
+            tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp3_QS,  start_2);
+
+            tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp5_QS,  start_3);
+
+            tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 );
+            state_QS[ i ]  = tmp7_QS;
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp7_QS, state_QS[0]);
+
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,  start_1);
+
+            tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp4_QS,  start_2);
+
+            tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp6_QS,  start_3);
+
+            tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp8_QS;
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp8_QS,  state_QS[ 0 ]);
+
+        }
+        state_QS[ order ] = tmp7_QS;
+
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,  start_1);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp3_QS,  start_2);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp5_QS,  start_3);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp7_QS,  state_QS[ 0 ]);
+    }
+
+    for(;n< length; n++ ) {
+
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            state_QS[ i ] = tmp1_QS;
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,   state_QS[ 0 ]);
+
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp2_QS;
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,   state_QS[ 0 ]);
+        }
+        state_QS[ order ] = tmp1_QS;
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,   state_QS[ 0 ]);
+    }
+
+    temp64 =  corr_QC[ 0 ];
+    temp64 = __builtin_mips_shilo(temp64, val);
+
+    lsh = silk_CLZ64( temp64 ) - 35;
+    lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+    *scale = -( QC + lsh );
+    silk_assert( *scale >= -30 && *scale <= 12 );
+    if( lsh >= 0 ) {
+        for( i = 0; i < order + 1; i++ ) {
+            temp64 = corr_QC[ i ];
+            //temp64 = __builtin_mips_shilo(temp64, val);
+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+        }
+    } else {
+        for( i = 0; i < order + 1; i++ ) {
+            temp64 = corr_QC[ i ];
+            //temp64 = __builtin_mips_shilo(temp64, val);
+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+        }
+    }
+
+     corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val);
+
+     silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
+#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */
diff --git a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
index e24d2e9d33..22a89f75ae 100644
--- a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
+++ b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
@@ -138,9 +138,14 @@ static OPUS_INLINE void limit_warped_coefs(
     silk_assert( 0 );
 }
 
+#if defined(MIPSr1_ASM)
+#include "mips/noise_shape_analysis_FIX_mipsr1.h"
+#endif
+
 /**************************************************************/
 /* Compute noise shaping coefficients and initial gain values */
 /**************************************************************/
+#ifndef OVERRIDE_silk_noise_shape_analysis_FIX
 void silk_noise_shape_analysis_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
@@ -443,3 +448,4 @@ void silk_noise_shape_analysis_FIX(
     }
     RESTORE_STACK;
 }
+#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */
diff --git a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
index 1641a0fbcd..01bb9fc0a8 100644
--- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
@@ -72,7 +72,8 @@ static void silk_P_Ana_calc_energy_st3(
     opus_int          start_lag,                       /* I lag offset to search around */
     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     opus_int          nb_subfr,                        /* I number of subframes         */
-    opus_int          complexity                       /* I Complexity setting          */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
 );
 
 /*************************************************************/
@@ -195,8 +196,8 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
-        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
-        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
+        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
+        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ, arch ) );
         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
 
         matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
@@ -334,7 +335,7 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
         silk_assert( target_ptr >= frame_8kHz );
         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
 
-        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
         for( j = 0; j < length_d_comp; j++ ) {
             d = d_comp[ j ];
             basis_ptr = target_ptr - d;
@@ -343,9 +344,9 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
             silk_assert( basis_ptr >= frame_8kHz );
             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
 
-            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
             if( cross_corr > 0 ) {
-                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
                     (opus_int16)silk_DIV32_varQ( cross_corr,
                                                  silk_ADD32( energy_target,
@@ -519,14 +520,14 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
         silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
-        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
 
         lag_counter = 0;
         silk_assert( lag == silk_SAT16( lag ) );
         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
 
         target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
-        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
         for( d = start_lag; d <= end_lag; d++ ) {
             for( j = 0; j < nb_cbk_search; j++ ) {
                 cross_corr = 0;
@@ -671,7 +672,8 @@ static void silk_P_Ana_calc_energy_st3(
     opus_int          start_lag,                        /* I lag offset to search around */
     opus_int          sf_length,                        /* I length of one 5 ms subframe */
     opus_int          nb_subfr,                         /* I number of subframes         */
-    opus_int          complexity                        /* I Complexity setting          */
+    opus_int          complexity,                       /* I Complexity setting          */
+    int               arch                              /* I Run-time architecture       */
 )
 {
     const opus_int16 *target_ptr, *basis_ptr;
@@ -705,7 +707,7 @@ static void silk_P_Ana_calc_energy_st3(
 
         /* Calculate the energy for first lag */
         basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
-        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
+        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
         silk_assert( energy >= 0 );
         scratch_mem[ lag_counter ] = energy;
         lag_counter++;
diff --git a/media/libopus/silk/fixed/prefilter_FIX.c b/media/libopus/silk/fixed/prefilter_FIX.c
index d381730c28..6a8e35152e 100644
--- a/media/libopus/silk/fixed/prefilter_FIX.c
+++ b/media/libopus/silk/fixed/prefilter_FIX.c
@@ -33,6 +33,16 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "tuning_parameters.h"
 
+#if defined(MIPSr1_ASM)
+#include "mips/prefilter_FIX_mipsr1.h"
+#endif
+
+
+#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX)
+#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
+    ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
+#endif
+
 /* Prefilter for finding Quantizer input signal */
 static OPUS_INLINE void silk_prefilt_FIX(
     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
@@ -45,7 +55,7 @@ static OPUS_INLINE void silk_prefilt_FIX(
     opus_int                    length                      /* I    Length of signals                   */
 );
 
-void silk_warped_LPC_analysis_filter_FIX(
+void silk_warped_LPC_analysis_filter_FIX_c(
           opus_int32            state[],                    /* I/O  State [order + 1]                   */
           opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
     const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
@@ -130,7 +140,7 @@ void silk_prefilter_FIX(
 
         /* Short term FIR filtering*/
         silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
-            psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+            psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch );
 
         /* Reduce (mainly) low frequencies during harmonic emphasis */
         B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
@@ -155,6 +165,7 @@ void silk_prefilter_FIX(
     RESTORE_STACK;
 }
 
+#ifndef OVERRIDE_silk_prefilt_FIX
 /* Prefilter for finding Quantizer input signal */
 static OPUS_INLINE void silk_prefilt_FIX(
     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
@@ -207,3 +218,4 @@ static OPUS_INLINE void silk_prefilt_FIX(
     P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
     P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
 }
+#endif /* OVERRIDE_silk_prefilt_FIX */
diff --git a/media/libopus/silk/fixed/residual_energy_FIX.c b/media/libopus/silk/fixed/residual_energy_FIX.c
index 105ae31807..41f74778e8 100644
--- a/media/libopus/silk/fixed/residual_energy_FIX.c
+++ b/media/libopus/silk/fixed/residual_energy_FIX.c
@@ -42,7 +42,8 @@ void silk_residual_energy_FIX(
     const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
     const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
     const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
-    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+    const opus_int                  LPC_order,                              /* I    LPC order                                                                   */
+          int                       arch                                    /* I    Run-time architecture                                                       */
 )
 {
     opus_int         offset, i, j, rshift, lz1, lz2;
@@ -60,7 +61,7 @@ void silk_residual_energy_FIX(
     silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
     for( i = 0; i < nb_subfr >> 1; i++ ) {
         /* Calculate half frame LPC residual signal including preceding samples */
-        silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
+        silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch );
 
         /* Point to first subframe of the just calculated LPC residual signal */
         LPC_res_ptr = LPC_res + LPC_order;
diff --git a/media/libopus/silk/fixed/structs_FIX.h b/media/libopus/silk/fixed/structs_FIX.h
index 244b479344..3294b25128 100644
--- a/media/libopus/silk/fixed/structs_FIX.h
+++ b/media/libopus/silk/fixed/structs_FIX.h
@@ -116,6 +116,7 @@ typedef struct {
 typedef struct {
     silk_encoder_state_FIX      state_Fxx[ ENCODER_NUM_CHANNELS ];
     stereo_enc_state            sStereo;
+    opus_int32                  nBitsUsedLBRR;
     opus_int32                  nBitsExceeded;
     opus_int                    nChannelsAPI;
     opus_int                    nChannelsInternal;
diff --git a/media/libopus/silk/fixed/vector_ops_FIX.c b/media/libopus/silk/fixed/vector_ops_FIX.c
index 509c8b35a1..d94980014f 100644
--- a/media/libopus/silk/fixed/vector_ops_FIX.c
+++ b/media/libopus/silk/fixed/vector_ops_FIX.c
@@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 #include "SigProc_FIX.h"
+#include "pitch.h"
 
 /* Copy and multiply a vector by a constant */
 void silk_scale_copy_vector16(
@@ -70,18 +71,23 @@ void silk_scale_vector32_Q26_lshift_18(
 opus_int32 silk_inner_prod_aligned(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
-    const opus_int              len                 /*    I vector lengths                                              */
+    const opus_int              len,                /*    I vector lengths                                              */
+    int                         arch                /*    I Run-time architecture                                       */
 )
 {
+#ifdef FIXED_POINT
+   return celt_inner_prod(inVec1, inVec2, len, arch);
+#else
     opus_int   i;
     opus_int32 sum = 0;
     for( i = 0; i < len; i++ ) {
         sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
     }
     return sum;
+#endif
 }
 
-opus_int64 silk_inner_prod16_aligned_64(
+opus_int64 silk_inner_prod16_aligned_64_c(
     const opus_int16            *inVec1,            /*    I input vector 1                                              */
     const opus_int16            *inVec2,            /*    I input vector 2                                              */
     const opus_int              len                 /*    I vector lengths                                              */
diff --git a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c
index a4a579b10d..6ca6c1184d 100644
--- a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c
+++ b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c
@@ -34,6 +34,12 @@ POSSIBILITY OF SUCH DAMAGE.
 #define QC  10
 #define QS  14
 
+#if defined(MIPSr1_ASM)
+#include "mips/warped_autocorrelation_FIX_mipsr1.h"
+#endif
+
+
+#ifndef OVERRIDE_silk_warped_autocorrelation_FIX
 /* Autocorrelations for a warped frequency axis */
 void silk_warped_autocorrelation_FIX(
           opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
@@ -86,3 +92,4 @@ void silk_warped_autocorrelation_FIX(
     }
     silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
 }
+#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */
diff --git a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
new file mode 100644
index 0000000000..3756095fbe
--- /dev/null
+++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
@@ -0,0 +1,375 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "tuning_parameters.h"
+#include "pitch.h"
+#include "celt/x86/x86cpu.h"
+
+#define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA                          25
+#define N_BITS_HEAD_ROOM            2
+#define MIN_RSHIFTS                 -16
+#define MAX_RSHIFTS                 (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified_sse4_1(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+    const opus_int16 *x_ptr;
+    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
+    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
+    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
+    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+
+    __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
+    __m128i CONST1 = _mm_set1_epi32(1);
+
+    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+    if( rshifts > MAX_RSHIFTS ) {
+        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+        silk_assert( C0 > 0 );
+        rshifts = MAX_RSHIFTS;
+    } else {
+        lz = silk_CLZ32( C0 ) - 1;
+        rshifts_extra = N_BITS_HEAD_ROOM - lz;
+        if( rshifts_extra > 0 ) {
+            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+            C0 = silk_RSHIFT32( C0, rshifts_extra );
+        } else {
+            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+            C0 = silk_LSHIFT32( C0, -rshifts_extra );
+        }
+        rshifts += rshifts_extra;
+    }
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+    if( rshifts > 0 ) {
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
+            }
+        }
+    } else {
+        for( s = 0; s < nb_subfr; s++ ) {
+            int i;
+            opus_int32 d;
+            x_ptr = x + s * subfr_length;
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+            for( n = 1; n < D + 1; n++ ) {
+               for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+                  d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+               xcorr[ n - 1 ] += d;
+            }
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+            }
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        if( rshifts > -2 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp_QA = Af_QA[ k ];
+                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
+                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
+                }
+                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
+                }
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
+
+                X1_3210 = _mm_set1_epi32( x1 );
+                X2_3210 = _mm_set1_epi32( x2 );
+                TMP1_3210 = _mm_setzero_si128();
+                TMP2_3210 = _mm_setzero_si128();
+                for( k = 0; k < n - 3; k += 4 ) {
+                    PTR_3210   = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] );
+                    SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] );
+                    FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] );
+                    PTR_3210   = _mm_shuffle_epi32( PTR_3210,  _MM_SHUFFLE( 0, 1, 2, 3 ) );
+                    LAST_3210  = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] );
+                    ATMP_3210  = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] );
+
+                    T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 );
+                    T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 );
+
+                    ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 );
+                    ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 );
+                    ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 );
+
+                    FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 );
+                    LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 );
+
+                    PTR_3210   = _mm_mullo_epi32( ATMP_3210, PTR_3210 );
+                    SUBFR_3210   = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 );
+
+                    _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 );
+                    _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 );
+
+                    TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 );
+                    TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 );
+                }
+
+                TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) );
+                TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) );
+                TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) );
+                TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) );
+
+                tmp1 += _mm_cvtsi128_si32( TMP1_3210 );
+                tmp2 += _mm_cvtsi128_si32( TMP2_3210 );
+
+                for( ; k < n; k++ ) {
+                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
+                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                }
+
+                tmp1 = -tmp1;                /* Q17 */
+                tmp2 = -tmp2;                /* Q17 */
+
+                {
+                    __m128i xmm_tmp1, xmm_tmp2;
+                    __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
+                    __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;
+
+                    xmm_tmp1 = _mm_set1_epi32( tmp1 );
+                    xmm_tmp2 = _mm_set1_epi32( tmp2 );
+
+                    for( k = 0; k <= n - 3; k += 4 ) {
+                        xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] );
+                        xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 );
+                        xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 );
+
+                        /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
+                        xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+                        xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 );
+                        xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 );
+                        xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 );
+                        xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 );
+                        xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 );
+                        xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 );
+                        xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC );
+                        xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC );
+
+                        X1_3210  = _mm_loadu_si128( (__m128i *)&CAf[ k ] );
+                        PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] );
+
+                        X1_3210  = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 );
+                        PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 );
+
+                        _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 );
+                        _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 );
+                    }
+
+                    for( ; k <= n; k++ ) {
+                        CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+                            silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
+                        CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+                            silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+                    }
+                }
+            }
+        }
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
+        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
+        num  = 0;                                                                                       /* Q( -rshifts ) */
+        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
+        for( k = 0; k < n; k++ ) {
+            Atmp_QA = Af_QA[ k ];
+            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+            lz = silk_min( 32 - QA, lz );
+            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
+
+            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
+        }
+        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
+        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
+        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
+        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        if( silk_abs( num ) < nrg ) {
+            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+        } else {
+            rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+        }
+
+        /* Update inverse prediction gain */
+        tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+        if( tmp1 <= minInvGain_Q30 ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
+            rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
+            /* Newton-Raphson iteration */
+            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+            if( num < 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc_Q31 = -rc_Q31;
+            }
+            invGain_Q30 = minInvGain_Q30;
+            reached_max_gain = 1;
+        } else {
+            invGain_Q30 = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af_QA[ k ];                                                                  /* QA */
+            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
+            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
+            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
+        }
+        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af_QA[ k ] = 0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
+            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
+            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+        }
+    }
+
+    if( reached_max_gain ) {
+        for( k = 0; k < D; k++ ) {
+            /* Scale coefficients */
+            A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        if( rshifts > 0 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts );
+            }
+        }
+        /* Approximate residual energy */
+        *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+        *res_nrg_Q = -rshifts;
+    } else {
+        /* Return residual energy */
+        nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
+        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
+        for( k = 0; k < D; k++ ) {
+            Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
+            nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
+            tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
+            A_Q16[ k ] = -Atmp1;
+        }
+        *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+        *res_nrg_Q = -rshifts;
+    }
+}
diff --git a/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c b/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c
new file mode 100644
index 0000000000..488a603f5d
--- /dev/null
+++ b/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c
@@ -0,0 +1,160 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+
+void silk_warped_LPC_analysis_filter_FIX_sse4_1(
+    opus_int32                  state[],                    /* I/O  State [order + 1]                   */
+    opus_int32                  res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+)
+{
+    opus_int     n, i;
+    opus_int32   acc_Q11, tmp1, tmp2;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    if (order == 10)
+    {
+        if (0 == lambda_Q16)
+        {
+            __m128i coef_Q13_3210, coef_Q13_7654;
+            __m128i coef_Q13_0123, coef_Q13_4567;
+            __m128i state_0123, state_4567;
+            __m128i xmm_product1, xmm_product2;
+            __m128i xmm_tempa, xmm_tempb;
+
+            register opus_int32 sum;
+            register opus_int32 state_8, state_9, state_a;
+            register opus_int64 coef_Q13_8, coef_Q13_9;
+
+            silk_assert( length > 0 );
+
+            coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] );
+            coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] );
+
+            coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+            coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+            coef_Q13_8 = (opus_int64) coef_Q13[ 8 ];
+            coef_Q13_9 = (opus_int64) coef_Q13[ 9 ];
+
+            state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) );
+            state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) );
+
+            state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+            state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+            state_8 = state[ 8 ];
+            state_9 = state[ 9 ];
+            state_a = 0;
+
+            for( n = 0; n < length; n++ )
+            {
+                xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */
+                xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 );
+
+                xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+                xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+                xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */
+                xmm_product2 = _mm_srli_epi64( xmm_product2, 16 );
+
+                xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa );
+                xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb );
+
+                xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 );
+                xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 );
+
+                xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 );
+                xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 );
+                xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb );
+
+                sum  = (coef_Q13_8 * state_8) >> 16;
+                sum += (coef_Q13_9 * state_9) >> 16;
+
+                xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
+                sum += _mm_cvtsi128_si32( xmm_tempa);
+                res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9);
+
+                /* move right */
+                state_a = state_9;
+                state_9 = state_8;
+                state_8 = _mm_cvtsi128_si32( state_4567 );
+                state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 );
+
+                state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 );
+            }
+
+            _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
+            _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
+            state[ 8 ] = state_8;
+            state[ 9 ] = state_9;
+            state[ 10 ] = state_a;
+
+            return;
+        }
+    }
+
+    for( n = 0; n < length; n++ ) {
+        /* Output of lowpass section */
+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+        state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
+        /* Output of allpass section */
+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+        state[ 1 ] = tmp2;
+        acc_Q11 = silk_RSHIFT( order, 1 );
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+            state[ i ] = tmp1;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+            state[ i + 1 ] = tmp2;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+        }
+        state[ order ] = tmp1;
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+    }
+}
diff --git a/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c
new file mode 100644
index 0000000000..c1e90564d0
--- /dev/null
+++ b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+
+#include "SigProc_FIX.h"
+#include "pitch.h"
+
+opus_int64 silk_inner_prod16_aligned_64_sse4_1(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int  i, dataSize8;
+    opus_int64 sum;
+
+    __m128i xmm_tempa;
+    __m128i inVec1_76543210, acc1;
+    __m128i inVec2_76543210, acc2;
+
+    sum = 0;
+    dataSize8 = len & ~7;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for( i = 0; i < dataSize8; i += 8 ) {
+        inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) );
+        inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) );
+
+        /* only when all 4 operands are -32768 (0x8000), this results in wrap around */
+        inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 );
+
+        xmm_tempa       = _mm_cvtepi32_epi64( inVec1_76543210 );
+        /* equal shift right 8 bytes */
+        inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+        inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 );
+
+        acc1 = _mm_add_epi64( acc1, xmm_tempa );
+        acc2 = _mm_add_epi64( acc2, inVec1_76543210 );
+    }
+
+    acc1 = _mm_add_epi64( acc1, acc2 );
+
+    /* equal shift right 8 bytes */
+    acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+    acc1 = _mm_add_epi64( acc1, acc2 );
+
+    _mm_storel_epi64( (__m128i *)&sum, acc1 );
+
+    for( ; i < len; i++ ) {
+        sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+
+    return sum;
+}
diff --git a/media/libopus/silk/float/encode_frame_FLP.c b/media/libopus/silk/float/encode_frame_FLP.c
index d54e2686e5..2092a4d9e2 100644
--- a/media/libopus/silk/float/encode_frame_FLP.c
+++ b/media/libopus/silk/float/encode_frame_FLP.c
@@ -47,7 +47,7 @@ void silk_encode_do_VAD_FLP(
     /****************************/
     /* Voice Activity Detection */
     /****************************/
-    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
 
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
diff --git a/media/libopus/silk/float/find_LPC_FLP.c b/media/libopus/silk/float/find_LPC_FLP.c
index 61c1ad9554..fcfe1c3681 100644
--- a/media/libopus/silk/float/find_LPC_FLP.c
+++ b/media/libopus/silk/float/find_LPC_FLP.c
@@ -99,6 +99,6 @@ void silk_find_LPC_FLP(
         silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder );
     }
 
-    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || 
+    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 ||
         ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
 }
diff --git a/media/libopus/silk/float/find_pred_coefs_FLP.c b/media/libopus/silk/float/find_pred_coefs_FLP.c
index ea2c6c432a..1af4fe5f1b 100644
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c
+++ b/media/libopus/silk/float/find_pred_coefs_FLP.c
@@ -67,7 +67,8 @@ void silk_find_pred_coefs_FLP(
 
         /* Quantize LTP gain parameters */
         silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr,
+            psEnc->sCmn.arch );
 
         /* Control LTP scaling */
         silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
@@ -90,13 +91,13 @@ void silk_find_pred_coefs_FLP(
         }
         silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
         psEncCtrl->LTPredCodGain = 0.0f;
-		psEnc->sCmn.sum_log_gain_Q7 = 0;
+        psEnc->sCmn.sum_log_gain_Q7 = 0;
     }
 
     /* Limit on total predictive coding gain */
     if( psEnc->sCmn.first_frame_after_reset ) {
         minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET;
-    } else {        
+    } else {
         minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) /  MAX_PREDICTION_POWER_GAIN;
         minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
     }
diff --git a/media/libopus/silk/float/main_FLP.h b/media/libopus/silk/float/main_FLP.h
index fb553b61aa..e5a75972e5 100644
--- a/media/libopus/silk/float/main_FLP.h
+++ b/media/libopus/silk/float/main_FLP.h
@@ -205,7 +205,8 @@ void silk_quant_LTP_gains_FLP(
     const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
     const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
     const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
-    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    int                             arch                                /* I    Run-time architecture                       */
 );
 
 /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
diff --git a/media/libopus/silk/float/pitch_analysis_core_FLP.c b/media/libopus/silk/float/pitch_analysis_core_FLP.c
index e58f041bd6..d0e637a29d 100644
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c
+++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c
@@ -182,8 +182,8 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
-        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
-                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +
+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) +
                      sf_length_8kHz * 4000.0f;
 
         C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
diff --git a/media/libopus/silk/float/structs_FLP.h b/media/libopus/silk/float/structs_FLP.h
index bb529e71a4..14d647ced2 100644
--- a/media/libopus/silk/float/structs_FLP.h
+++ b/media/libopus/silk/float/structs_FLP.h
@@ -115,6 +115,7 @@ typedef struct {
 typedef struct {
     silk_encoder_state_FLP      state_Fxx[ ENCODER_NUM_CHANNELS ];
     stereo_enc_state            sStereo;
+    opus_int32                  nBitsUsedLBRR;
     opus_int32                  nBitsExceeded;
     opus_int                    nChannelsAPI;
     opus_int                    nChannelsInternal;
diff --git a/media/libopus/silk/float/wrappers_FLP.c b/media/libopus/silk/float/wrappers_FLP.c
index 350599b20c..6666b8efaa 100644
--- a/media/libopus/silk/float/wrappers_FLP.c
+++ b/media/libopus/silk/float/wrappers_FLP.c
@@ -161,10 +161,10 @@ void silk_NSQ_wrapper_FLP(
     /* Call NSQ */
     if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
         silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
-            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch );
     } else {
         silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
-            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch );
     }
 }
 
@@ -179,7 +179,8 @@ void silk_quant_LTP_gains_FLP(
     const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
     const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
     const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
-    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    int                             arch                                /* I    Run-time architecture                       */
 )
 {
     opus_int   i;
@@ -193,7 +194,7 @@ void silk_quant_LTP_gains_FLP(
         W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
     }
 
-    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch );
 
     for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
         B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
diff --git a/media/libopus/silk/log2lin.c b/media/libopus/silk/log2lin.c
index a692e009db..b7c48e4740 100644
--- a/media/libopus/silk/log2lin.c
+++ b/media/libopus/silk/log2lin.c
@@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Approximation of 2^() (very close inverse of silk_lin2log()) */
 /* Convert input to a linear scale    */
-opus_int32 silk_log2lin( 
+opus_int32 silk_log2lin(
     const opus_int32            inLog_Q7            /* I  input on log scale                                            */
 )
 {
@@ -42,8 +42,8 @@ opus_int32 silk_log2lin(
     if( inLog_Q7 < 0 ) {
         return 0;
     } else if ( inLog_Q7 >= 3967 ) {
-		return silk_int32_MAX;
-	}
+        return silk_int32_MAX;
+    }
 
     out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
     frac_Q7 = inLog_Q7 & 0x7F;
diff --git a/media/libopus/silk/macros.h b/media/libopus/silk/macros.h
index a84e5a5d3e..bc30303466 100644
--- a/media/libopus/silk/macros.h
+++ b/media/libopus/silk/macros.h
@@ -35,19 +35,42 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "opus_types.h"
 #include "opus_defines.h"
 
+#if OPUS_GNUC_PREREQ(3, 0)
+#define opus_likely(x)       (__builtin_expect(!!(x), 1))
+#define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
+#else
+#define opus_likely(x)       (!!(x))
+#define opus_unlikely(x)     (!!(x))
+#endif
+
+/* Set this if opus_int64 is a native type of the CPU. */
+#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64))
+
 /* This is an OPUS_INLINE header file for general platform. */
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#if OPUS_FAST_INT64
+#define silk_SMULWB(a32, b32)            (((a32) * (opus_int64)((opus_int16)(b32))) >> 16)
+#else
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
+#endif
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#if OPUS_FAST_INT64
+#define silk_SMLAWB(a32, b32, c32)       ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))
+#else
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
+#endif
 
 /* (a32 * (b32 >> 16)) >> 16 */
 #define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
+#if OPUS_FAST_INT64
+#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))
+#else
 #define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
+#endif
 
 /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
 #define silk_SMULBB(a32, b32)            ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
@@ -65,10 +88,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #define silk_SMLAL(a64, b32, c32)        (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
 
 /* (a32 * b32) >> 16 */
+#if OPUS_FAST_INT64
+#define silk_SMULWW(a32, b32)            (((opus_int64)(a32) * (b32)) >> 16)
+#else
 #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
+#endif
 
 /* a32 + ((b32 * c32) >> 16) */
+#if OPUS_FAST_INT64
+#define silk_SMLAWW(a32, b32, c32)       ((a32) + (((opus_int64)(b32) * (c32)) >> 16))
+#else
 #define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
+#endif
 
 /* add/subtract with output saturated */
 #define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
@@ -79,17 +110,24 @@ POSSIBILITY OF SUCH DAMAGE.
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
-#include "ecintrin.h"
+#if defined(MIPSr1_ASM)
+#include "mips/macros_mipsr1.h"
+#endif
 
+#include "ecintrin.h"
+#ifndef OVERRIDE_silk_CLZ16
 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
     return 32 - EC_ILOG(in16<<16|0x8000);
 }
+#endif
 
+#ifndef OVERRIDE_silk_CLZ32
 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 {
     return in32 ? 32 - EC_ILOG(in32) : 32;
 }
+#endif
 
 /* Row based */
 #define matrix_ptr(Matrix_base_adr, row, column, N) \
diff --git a/media/libopus/silk/main.h b/media/libopus/silk/main.h
index 2bdf89784d..2f90d68f7d 100644
--- a/media/libopus/silk/main.h
+++ b/media/libopus/silk/main.h
@@ -38,6 +38,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "entenc.h"
 #include "entdec.h"
 
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#include "x86/main_sse.h"
+#endif
+
 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
 void silk_stereo_LR_to_MS(
     stereo_enc_state            *state,                         /* I/O  State                                       */
@@ -116,7 +120,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -161,7 +165,7 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 );
@@ -204,15 +208,16 @@ void silk_quant_LTP_gains(
     opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
-	opus_int32					*sum_gain_dB_Q7,							/* I/O  Cumulative max prediction gain  */
+    opus_int32                  *sum_gain_dB_Q7,                            /* I/O  Cumulative max prediction gain  */
     const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
     opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
     opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
-    const opus_int              nb_subfr                                    /* I    number of subframes             */
+    const opus_int              nb_subfr,                                   /* I    number of subframes             */
+    int                         arch                                        /* I    Run-time architecture           */
 );
 
 /* Entropy constrained matrix-weighted VQ, for a single input data vector */
-void silk_VQ_WMat_EC(
+void silk_VQ_WMat_EC_c(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
@@ -226,10 +231,18 @@ void silk_VQ_WMat_EC(
     opus_int                    L                               /* I    number of vectors in codebook               */
 );
 
+#if !defined(OVERRIDE_silk_VQ_WMat_EC)
+#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L, arch) \
+    ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L))
+#endif
+
 /************************************/
 /* Noise shaping quantization (NSQ) */
 /************************************/
-void silk_NSQ(
+
+void silk_NSQ_c(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -247,8 +260,15 @@ void silk_NSQ(
     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
 );
 
+#if !defined(OVERRIDE_silk_NSQ)
+#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+#endif
+
 /* Noise shaping using delayed decision */
-void silk_NSQ_del_dec(
+void silk_NSQ_del_dec_c(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -266,6 +286,13 @@ void silk_NSQ_del_dec(
     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
 );
 
+#if !defined(OVERRIDE_silk_NSQ_del_dec)
+#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+#endif
+
 /************/
 /* Silk VAD */
 /************/
@@ -275,11 +302,15 @@ opus_int silk_VAD_Init(                                         /* O    Return v
 );
 
 /* Get speech activity level in Q8 */
-opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+opus_int silk_VAD_GetSA_Q8_c(                                   /* O    Return value, 0 if success                  */
     silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
     const opus_int16            pIn[]                           /* I    PCM input                                   */
 );
 
+#if !defined(OVERRIDE_silk_VAD_GetSA_Q8)
+#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_c(psEnC, pIn))
+#endif
+
 /* Low-pass filter with variable cutoff frequency based on  */
 /* piece-wise linear interpolation between elliptic filters */
 /* Start by setting transition_frame_no = 1;                */
@@ -373,7 +404,8 @@ opus_int silk_decode_frame(
     opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
     opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
     opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
-    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+    opus_int                    condCoding,                     /* I    The type of conditional coding to use       */
+    int                         arch                            /* I    Run-time architecture                       */
 );
 
 /* Decode indices from bitstream */
@@ -397,13 +429,14 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ],     /* I    Pulse signal                                */
+    int                         arch                            /* I    Run-time architecture                       */
 );
 
 /* Decode quantization indices of excitation (Shell coding) */
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
diff --git a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
new file mode 100644
index 0000000000..f6afd923e8
--- /dev/null
+++ b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
@@ -0,0 +1,405 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef __NSQ_DEL_DEC_MIPSR1_H__
+#define __NSQ_DEL_DEC_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+#define OVERRIDE_silk_noise_shape_quantizer_del_dec
+static inline void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+    opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;
+    opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;
+    opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;
+    opus_int16 a_Q12_14, a_Q12_15;
+
+    opus_int32 cur, prev, next;
+
+    //Intialize b_Q14 variables
+    b_Q14_0 = b_Q14[ 0 ];
+    b_Q14_1 = b_Q14[ 1 ];
+    b_Q14_2 = b_Q14[ 2 ];
+    b_Q14_3 = b_Q14[ 3 ];
+    b_Q14_4 = b_Q14[ 4 ];
+
+    //Intialize a_Q12 variables
+    a_Q12_0 = a_Q12[0];
+    a_Q12_1 = a_Q12[1];
+    a_Q12_2 = a_Q12[2];
+    a_Q12_3 = a_Q12[3];
+    a_Q12_4 = a_Q12[4];
+    a_Q12_5 = a_Q12[5];
+    a_Q12_6 = a_Q12[6];
+    a_Q12_7 = a_Q12[7];
+    a_Q12_8 = a_Q12[8];
+    a_Q12_9 = a_Q12[9];
+    a_Q12_10 = a_Q12[10];
+    a_Q12_11 = a_Q12[11];
+    a_Q12_12 = a_Q12[12];
+    a_Q12_13 = a_Q12[13];
+    a_Q12_14 = a_Q12[14];
+    a_Q12_15 = a_Q12[15];
+
+    long long temp64;
+
+    silk_assert( nStatesDelayedDecision > 0 );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 );
+            temp64 += 32768;
+            LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
+            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            /* Delayed decision state */
+            psDD = &psDelDec[ k ];
+
+            /* Sample state */
+            psSS = psSampleState[ k ];
+
+            /* Generate dither */
+            psDD->Seed = silk_RAND( psDD->Seed );
+
+            /* Pointer used in short term prediction and shaping */
+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+            /* Short-term prediction */
+            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+            temp64 = __builtin_mips_mult(psLPC_Q14[  0 ], a_Q12_0 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 );
+            if( predictLPCOrder == 16 ) {
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 );
+            }
+            temp64 += 32768;
+            LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
+
+            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
+
+            /* Noise shape feedback */
+            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+            /* Output of lowpass section */
+            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+            psDD->sAR2_Q14[ 0 ] = tmp2;
+
+            temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] );
+
+            prev = psDD->sAR2_Q14[ 1 ];
+
+            /* Loop over allpass sections */
+            for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                cur = psDD->sAR2_Q14[ j ];
+                next = psDD->sAR2_Q14[ j+1 ];
+                /* Output of allpass section */
+                tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 );
+                psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] );
+                temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                prev = next;
+            }
+            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+            temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+            temp64 += 32768;
+            n_AR_Q14 = __builtin_mips_extr_w(temp64, 16);
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            /* Input minus prediction plus noise feedback                       */
+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+            /* Flip sign depending on dither */
+            if ( psDD->Seed < 0 ) {
+                r_Q10 = -r_Q10;
+            }
+            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+            /* Find two quantization level candidates and measure their rate-distortion */
+            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+            if( q1_Q0 > 0 ) {
+                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == 0 ) {
+                q1_Q10  = offset_Q10;
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == -1 ) {
+                q2_Q10  = offset_Q10;
+                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+            } else {            /* q1_Q0 < -1 */
+                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+            }
+            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+            if( rd1_Q10 < rd2_Q10 ) {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 0 ].Q_Q10  = q1_Q10;
+                psSS[ 1 ].Q_Q10  = q2_Q10;
+            } else {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 0 ].Q_Q10  = q2_Q10;
+                psSS[ 1 ].Q_Q10  = q1_Q10;
+            }
+
+            /* Update states for best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 0 ].xq_Q14       = xq_Q14;
+
+            /* Update states for second best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 1 ].xq_Q14       = xq_Q14;
+        }
+
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+}
+
+#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */
diff --git a/media/libopus/silk/mips/macros_mipsr1.h b/media/libopus/silk/mips/macros_mipsr1.h
new file mode 100644
index 0000000000..12ed981a6e
--- /dev/null
+++ b/media/libopus/silk/mips/macros_mipsr1.h
@@ -0,0 +1,92 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+#ifndef __SILK_MACROS_MIPSR1_H__
+#define __SILK_MACROS_MIPSR1_H__
+
+#define mips_clz(x) __builtin_clz(x)
+
+#undef silk_SMULWB
+static inline int silk_SMULWB(int a, int b)
+{
+    long long ac;
+    int c;
+
+    ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b);
+    c = __builtin_mips_extr_w(ac, 16);
+
+    return c;
+}
+
+#undef silk_SMLAWB
+#define silk_SMLAWB(a32, b32, c32)       ((a32) + silk_SMULWB(b32, c32))
+
+#undef silk_SMULWW
+static inline int silk_SMULWW(int a, int b)
+{
+    long long ac;
+    int c;
+
+    ac = __builtin_mips_mult(a, b);
+    c = __builtin_mips_extr_w(ac, 16);
+
+    return c;
+}
+
+#undef silk_SMLAWW
+static inline int silk_SMLAWW(int a, int b, int c)
+{
+    long long ac;
+    int res;
+
+    ac = __builtin_mips_mult(b, c);
+    res = __builtin_mips_extr_w(ac, 16);
+    res += a;
+
+    return res;
+}
+
+#define OVERRIDE_silk_CLZ16
+static inline opus_int32 silk_CLZ16(opus_int16 in16)
+{
+    int re32;
+    opus_int32 in32 = (opus_int32 )in16;
+    re32 = mips_clz(in32);
+    re32-=16;
+    return re32;
+}
+
+#define OVERRIDE_silk_CLZ32
+static inline opus_int32 silk_CLZ32(opus_int32 in32)
+{
+    int re32;
+    re32 = mips_clz(in32);
+    return re32;
+}
+
+#endif /* __SILK_MACROS_MIPSR1_H__ */
diff --git a/media/libopus/silk/mips/sigproc_fix_mipsr1.h b/media/libopus/silk/mips/sigproc_fix_mipsr1.h
new file mode 100644
index 0000000000..3b0a695365
--- /dev/null
+++ b/media/libopus/silk/mips/sigproc_fix_mipsr1.h
@@ -0,0 +1,65 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_MIPSR1_H
+#define SILK_SIGPROC_FIX_MIPSR1_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+#undef silk_SAT16
+static inline short int silk_SAT16(int a)
+{
+    int c;
+    c = __builtin_mips_shll_s_w(a, 16);
+    c = c>>16;
+
+    return c;
+}
+
+#undef silk_LSHIFT_SAT32
+static inline int silk_LSHIFT_SAT32(int a, int shift)
+{
+    int r;
+
+    r = __builtin_mips_shll_s_w(a, shift);
+
+    return r;
+}
+
+#undef silk_RSHIFT_ROUND
+static inline int silk_RSHIFT_ROUND(int a, int shift)
+{
+    int r;
+
+    r = __builtin_mips_shra_r_w(a, shift);
+    return r;
+}
+
+#endif /* SILK_SIGPROC_FIX_MIPSR1_H */
diff --git a/media/libopus/silk/quant_LTP_gains.c b/media/libopus/silk/quant_LTP_gains.c
index fd0870da19..513a8c4468 100644
--- a/media/libopus/silk/quant_LTP_gains.c
+++ b/media/libopus/silk/quant_LTP_gains.c
@@ -36,11 +36,12 @@ void silk_quant_LTP_gains(
     opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
-	opus_int32					*sum_log_gain_Q7,							/* I/O  Cumulative max prediction gain  */
+    opus_int32                  *sum_log_gain_Q7,                           /* I/O  Cumulative max prediction gain  */
     const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
     opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
     opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
-    const opus_int              nb_subfr                                    /* I    number of subframes             */
+    const opus_int              nb_subfr,                                   /* I    number of subframes             */
+    int                         arch                                        /* I    Run-time architecture           */
 )
 {
     opus_int             j, k, cbk_size;
@@ -51,7 +52,7 @@ void silk_quant_LTP_gains(
     const opus_int16     *b_Q14_ptr;
     const opus_int32     *W_Q18_ptr;
     opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
-	opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
+    opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
 
     /***************************************************/
     /* iterate over different codebooks with different */
@@ -74,23 +75,24 @@ void silk_quant_LTP_gains(
         b_Q14_ptr = B_Q14;
 
         rate_dist_Q14 = 0;
-		sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
+        sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
         for( j = 0; j < nb_subfr; j++ ) {
-			max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) 
-										+ SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+            max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 )
+                                        + SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
 
             silk_VQ_WMat_EC(
                 &temp_idx[ j ],         /* O    index of best codebook vector                           */
                 &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */
-				&gain_Q7,               /* O    sum of absolute LTP coefficients                        */
+                &gain_Q7,               /* O    sum of absolute LTP coefficients                        */
                 b_Q14_ptr,              /* I    input vector to be quantized                            */
                 W_Q18_ptr,              /* I    weighting matrix                                        */
                 cbk_ptr_Q7,             /* I    codebook                                                */
                 cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */
                 cl_ptr_Q5,              /* I    code length for each codebook vector                    */
                 mu_Q9,                  /* I    tradeoff between weighted error and rate                */
-				max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
-                cbk_size                /* I    number of vectors in codebook                           */
+                max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
+                cbk_size,               /* I    number of vectors in codebook                           */
+                arch                    /* I    Run-time architecture                                   */
             );
 
             rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
@@ -108,7 +110,7 @@ void silk_quant_LTP_gains(
             min_rate_dist_Q14 = rate_dist_Q14;
             *periodicity_index = (opus_int8)k;
             silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
-			best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
+            best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
         }
 
         /* Break early in low-complexity mode if rate distortion is below threshold */
@@ -123,6 +125,5 @@ void silk_quant_LTP_gains(
             B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
         }
     }
-	*sum_log_gain_Q7 = best_sum_log_gain_Q7;
+    *sum_log_gain_Q7 = best_sum_log_gain_Q7;
 }
-
diff --git a/media/libopus/silk/resampler_rom.c b/media/libopus/silk/resampler_rom.c
index 2d502706f9..5e6b04476a 100644
--- a/media/libopus/silk/resampler_rom.c
+++ b/media/libopus/silk/resampler_rom.c
@@ -41,36 +41,36 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
-	-20694, -13867,
-	   -49,     64,     17,   -157,    353,   -496,    163,  11047,  22205,
-	   -39,      6,     91,   -170,    186,     23,   -896,   6336,  19928,
-	   -19,    -36,    102,    -89,    -24,    328,   -951,   2568,  15909,
+    -20694, -13867,
+       -49,     64,     17,   -157,    353,   -496,    163,  11047,  22205,
+       -39,      6,     91,   -170,    186,     23,   -896,   6336,  19928,
+       -19,    -36,    102,    -89,    -24,    328,   -951,   2568,  15909,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
-	-14457, -14019,
-	    64,    128,   -122,     36,    310,   -768,    584,   9267,  17733,
-	    12,    128,     18,   -142,    288,   -117,   -865,   4123,  14459,
+    -14457, -14019,
+        64,    128,   -122,     36,    310,   -768,    584,   9267,  17733,
+        12,    128,     18,   -142,    288,   -117,   -865,   4123,  14459,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = {
-	   616, -14323,
-	   -10,     39,     58,    -46,    -84,    120,    184,   -315,   -541,   1284,   5380,   9024,
+       616, -14323,
+       -10,     39,     58,    -46,    -84,    120,    184,   -315,   -541,   1284,   5380,   9024,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
-	 16102, -15162,
-	   -13,      0,     20,     26,      5,    -31,    -43,     -4,     65,     90,      7,   -157,   -248,    -44,    593,   1583,   2612,   3271,
+     16102, -15162,
+       -13,      0,     20,     26,      5,    -31,    -43,     -4,     65,     90,      7,   -157,   -248,    -44,    593,   1583,   2612,   3271,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
-	 22500, -15099,
-	     3,    -14,    -20,    -15,      2,     25,     37,     25,    -16,    -71,   -107,    -79,     50,    292,    623,    982,   1288,   1464,
+     22500, -15099,
+         3,    -14,    -20,    -15,      2,     25,     37,     25,    -16,    -71,   -107,    -79,     50,    292,    623,    982,   1288,   1464,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
-	 27540, -15257,
-	    17,     12,      8,      1,    -10,    -22,    -30,    -32,    -22,      3,     44,    100,    168,    243,    317,    381,    429,    455,
+     27540, -15257,
+        17,     12,      8,      1,    -10,    -22,    -30,    -32,    -22,      3,     44,    100,    168,    243,    317,    381,    429,    455,
 };
 
 silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = {
@@ -81,16 +81,16 @@ silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = {
 
 /* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */
 silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = {
-	{  189,  -600,   617, 30567 },
-	{  117,  -159, -1070, 29704 },
-	{   52,   221, -2392, 28276 },
-	{   -4,   529, -3350, 26341 },
-	{  -48,   758, -3956, 23973 },
-	{  -80,   905, -4235, 21254 },
-	{  -99,   972, -4222, 18278 },
-	{ -107,   967, -3957, 15143 },
-	{ -103,   896, -3487, 11950 },
-	{  -91,   773, -2865,  8798 },
-	{  -71,   611, -2143,  5784 },
-	{  -46,   425, -1375,  2996 },
+    {  189,  -600,   617, 30567 },
+    {  117,  -159, -1070, 29704 },
+    {   52,   221, -2392, 28276 },
+    {   -4,   529, -3350, 26341 },
+    {  -48,   758, -3956, 23973 },
+    {  -80,   905, -4235, 21254 },
+    {  -99,   972, -4222, 18278 },
+    { -107,   967, -3957, 15143 },
+    { -103,   896, -3487, 11950 },
+    {  -91,   773, -2865,  8798 },
+    {  -71,   611, -2143,  5784 },
+    {  -46,   425, -1375,  2996 },
 };
diff --git a/media/libopus/silk/shell_coder.c b/media/libopus/silk/shell_coder.c
index 796f57d6c2..4af341474b 100644
--- a/media/libopus/silk/shell_coder.c
+++ b/media/libopus/silk/shell_coder.c
@@ -58,8 +58,8 @@ static OPUS_INLINE void encode_split(
 }
 
 static OPUS_INLINE void decode_split(
-    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
-    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
+    opus_int16                  *p_child1,      /* O    pulse amplitude of first child subframe     */
+    opus_int16                  *p_child2,      /* O    pulse amplitude of second child subframe    */
     ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
     const opus_int              p,              /* I    pulse amplitude of current subframe         */
     const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
@@ -117,12 +117,12 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 )
 {
-    opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+    opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
 
     /* this function operates on one shell code frame of 16 pulses */
     silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
diff --git a/media/libopus/silk/structs.h b/media/libopus/silk/structs.h
index 1826b36a80..827829dc6f 100644
--- a/media/libopus/silk/structs.h
+++ b/media/libopus/silk/structs.h
@@ -171,7 +171,7 @@ typedef struct {
     opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */
     opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */
     opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */
-    opus_int32                   sum_log_gain_Q7;					/* Cumulative max prediction gain									*/
+    opus_int32                   sum_log_gain_Q7;                   /* Cumulative max prediction gain                                   */
     opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */
     opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */
     opus_int                     controlled_since_last_payload;     /* Flag for ensuring codec_control only runs once per packet        */
diff --git a/media/libopus/silk/sum_sqr_shift.c b/media/libopus/silk/sum_sqr_shift.c
index 12514c9917..129df191d8 100644
--- a/media/libopus/silk/sum_sqr_shift.c
+++ b/media/libopus/silk/sum_sqr_shift.c
@@ -53,6 +53,7 @@ void silk_sum_sqr_shift(
             /* Scale down */
             nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
             shft = 2;
+            i+=2;
             break;
         }
     }
diff --git a/media/libopus/silk/tables.h b/media/libopus/silk/tables.h
index a91431e854..7fea6fda39 100644
--- a/media/libopus/silk/tables.h
+++ b/media/libopus/silk/tables.h
@@ -47,8 +47,8 @@ extern const opus_uint8  silk_pitch_contour_NB_iCDF[ 11 ];
 extern const opus_uint8  silk_pitch_contour_10_ms_iCDF[ 12 ];                                       /*  12 */
 extern const opus_uint8  silk_pitch_contour_10_ms_NB_iCDF[ 3 ];                                     /*   3 */
 
-extern const opus_uint8  silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ];             /* 180 */
-extern const opus_uint8  silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ];      /* 162 */
+extern const opus_uint8  silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ SILK_MAX_PULSES + 2 ];        /* 180 */
+extern const opus_uint8  silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ SILK_MAX_PULSES + 2 ]; /* 162 */
 
 extern const opus_uint8  silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ];                           /*  18 */
 extern const opus_uint8  silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ];                        /*  18 */
@@ -59,7 +59,7 @@ extern const opus_uint8  silk_shell_code_table0[ 152 ];
 extern const opus_uint8  silk_shell_code_table1[ 152 ];                                             /* 152 */
 extern const opus_uint8  silk_shell_code_table2[ 152 ];                                             /* 152 */
 extern const opus_uint8  silk_shell_code_table3[ 152 ];                                             /* 152 */
-extern const opus_uint8  silk_shell_code_table_offsets[ MAX_PULSES + 1 ];                           /*  17 */
+extern const opus_uint8  silk_shell_code_table_offsets[ SILK_MAX_PULSES + 1 ];                      /*  17 */
 
 extern const opus_uint8  silk_lsb_iCDF[ 2 ];                                                        /*   2 */
 
diff --git a/media/libopus/silk/tuning_parameters.h b/media/libopus/silk/tuning_parameters.h
index e1057bbaae..5b8f404235 100644
--- a/media/libopus/silk/tuning_parameters.h
+++ b/media/libopus/silk/tuning_parameters.h
@@ -64,7 +64,7 @@ extern "C"
 #define MU_LTP_QUANT_WB                                 0.02f
 
 /* Max cumulative LTP gain */
-#define MAX_SUM_LOG_GAIN_DB								250.0f
+#define MAX_SUM_LOG_GAIN_DB                             250.0f
 
 /***********************/
 /* High pass filtering */
diff --git a/media/libopus/silk/x86/NSQ_del_dec_sse.c b/media/libopus/silk/x86/NSQ_del_dec_sse.c
new file mode 100644
index 0000000000..21d4a8bc1e
--- /dev/null
+++ b/media/libopus/silk/x86/NSQ_del_dec_sse.c
@@ -0,0 +1,857 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+
+#include "stack_alloc.h"
+
+typedef struct {
+    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32 RandState[ DECISION_DELAY ];
+    opus_int32 Q_Q10[     DECISION_DELAY ];
+    opus_int32 Xq_Q14[    DECISION_DELAY ];
+    opus_int32 Pred_Q15[  DECISION_DELAY ];
+    opus_int32 Shape_Q14[ DECISION_DELAY ];
+    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32 LF_AR_Q14;
+    opus_int32 Seed;
+    opus_int32 SeedInit;
+    opus_int32 RD_Q10;
+} NSQ_del_dec_struct;
+
+typedef struct {
+    opus_int32 Q_Q10;
+    opus_int32 RD_Q10;
+    opus_int32 xq_Q14;
+    opus_int32 LF_AR_Q14;
+    opus_int32 sLTP_shp_Q14;
+    opus_int32 LPC_exc_Q14;
+} NSQ_sample_struct;
+
+typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+);
+
+void silk_NSQ_del_dec_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    opus_int32          RDmin_Q10, Gain_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    VARDECL( opus_int32, delayedGain_Q10 );
+    VARDECL( NSQ_del_dec_struct, psDelDec );
+    NSQ_del_dec_struct  *psDD;
+    SAVE_STACK;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    /* Initialize delayed decision states */
+    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
+    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
+    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
+        psDD                 = &psDelDec[ k ];
+        psDD->Seed           = ( k + psIndices->Seed ) & 3;
+        psDD->SeedInit       = psDD->Seed;
+        psDD->RD_Q10         = 0;
+        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
+        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
+        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
+    }
+
+    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+    smpl_buf_idx = 0; /* index of oldest samples */
+
+    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
+
+    /* For voiced frames limit the decision delay to lower than the pitch lag */
+    if( psIndices->signalType == TYPE_VOICED ) {
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
+        }
+    } else {
+        if( lag > 0 ) {
+            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
+        }
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    subfr = 0;
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                if( k == 2 ) {
+                    /* RESET DELAYED DECISIONS */
+                    /* Find winner */
+                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+                    Winner_ind = 0;
+                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
+                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
+                            Winner_ind = i;
+                        }
+                    }
+                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( i != Winner_ind ) {
+                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
+                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
+                        }
+                    }
+
+                    /* Copy final part of signals from winner state to output and long-term filter states */
+                    psDD = &psDelDec[ Winner_ind ];
+                    last_smple_idx = smpl_buf_idx + decisionDelay;
+                    for( i = 0; i < decisionDelay; i++ ) {
+                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
+                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+                    }
+
+                    subfr = 0;
+                }
+
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
+
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+                NSQ->rewhite_flag = 1;
+            }
+        }
+
+        silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
+            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
+
+        silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
+            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Find winner */
+    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+    Winner_ind = 0;
+    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
+        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
+            RDmin_Q10 = psDelDec[ k ].RD_Q10;
+            Winner_ind = k;
+        }
+    }
+
+    /* Copy final part of signals from winner state to output and long-term filter states */
+    psDD = &psDelDec[ Winner_ind ];
+    psIndices->Seed = psDD->SeedInit;
+    last_smple_idx = smpl_buf_idx + decisionDelay;
+    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
+    for( i = 0; i < decisionDelay; i++ ) {
+        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+    }
+    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
+
+    /* Update states */
+    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
+    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech signal */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    VARDECL( NSQ_sample_pair, psSampleState );
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+
+    __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF;
+    __m128i b_Q12_0123, b_sr_Q12_0123;
+    SAVE_STACK;
+
+    silk_assert( nStatesDelayedDecision > 0 );
+    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 );
+    a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 );
+
+    if( opus_likely( predictLPCOrder == 16 ) ) {
+        a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 );
+        a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 );
+    }
+
+    if( signalType == TYPE_VOICED ){
+        b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 );
+        b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+    }
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q14 = 2;
+            {
+                __m128i tmpa, tmpb, pred_lag_ptr_tmp;
+                pred_lag_ptr_tmp    = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                pred_lag_ptr_tmp    = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
+                tmpa                = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 );
+                tmpa                = _mm_srli_si128( tmpa, 2 );
+
+                pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */
+                pred_lag_ptr_tmp    = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 );
+                pred_lag_ptr_tmp    = _mm_srli_si128( pred_lag_ptr_tmp, 2 );
+                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpa );
+
+                tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */
+                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpb );
+                LTP_pred_Q14        += _mm_cvtsi128_si32( pred_lag_ptr_tmp );
+
+                LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+                LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+                pred_lag_ptr++;
+            }
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+        {
+            __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp;
+
+            for( k = 0; k < nStatesDelayedDecision; k++ ) {
+                /* Delayed decision state */
+                psDD = &psDelDec[ k ];
+
+                /* Sample state */
+                psSS = psSampleState[ k ];
+
+                /* Generate dither */
+                psDD->Seed = silk_RAND( psDD->Seed );
+
+                /* Pointer used in short term prediction and shaping */
+                psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+                /* Short-term prediction */
+                silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+                /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+                LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
+
+                tmpb = _mm_setzero_si128();
+
+                /* step 1 */
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
+                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );      /* 0, -1, -2, -3 */
+                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );    /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
+
+                tmpa            = _mm_srli_epi64( tmpa, 16 );
+                tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */
+                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                /* step 2 */
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
+                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
+                tmpa            = _mm_srli_epi64( tmpa, 16 );
+                tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                if ( opus_likely( predictLPCOrder == 16 ) )
+                {
+                    /* step 3 */
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) );
+                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB );
+                    tmpa            = _mm_srli_epi64( tmpa, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */
+                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                    /* setp 4 */
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) );
+                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF );
+                    tmpa            = _mm_srli_epi64( tmpa, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                    /* add at last */
+                    /* equal shift right 8 bytes*/
+                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
+                }
+                else
+                {
+                    /* add at last */
+                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
+
+                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+                }
+
+                LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
+
+                /* Noise shape feedback */
+                silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+                /* Output of lowpass section */
+                tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ 0 ] = tmp2;
+                n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
+                /* Loop over allpass sections */
+                for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                    /* Output of allpass section */
+                    tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
+                    psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
+                    /* Output of allpass section */
+                    tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
+                    psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
+                }
+                psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+                n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+                n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+                n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+                /* Input minus prediction plus noise feedback                       */
+                /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+                tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+                tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+                tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+                tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+                r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+                /* Flip sign depending on dither */
+                if ( psDD->Seed < 0 ) {
+                    r_Q10 = -r_Q10;
+                }
+                r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+                /* Find two quantization level candidates and measure their rate-distortion */
+                q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+                q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+                if( q1_Q0 > 0 ) {
+                    q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+                } else if( q1_Q0 == 0 ) {
+                    q1_Q10  = offset_Q10;
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+                } else if( q1_Q0 == -1 ) {
+                    q2_Q10  = offset_Q10;
+                    q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+                } else {            /* q1_Q0 < -1 */
+                    q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+                }
+                rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+                rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+                rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+                rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+                if( rd1_Q10 < rd2_Q10 ) {
+                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                    psSS[ 0 ].Q_Q10  = q1_Q10;
+                    psSS[ 1 ].Q_Q10  = q2_Q10;
+                } else {
+                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                    psSS[ 0 ].Q_Q10  = q2_Q10;
+                    psSS[ 1 ].Q_Q10  = q1_Q10;
+                }
+
+                /* Update states for best quantization */
+
+                /* Quantized excitation */
+                exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+                if ( psDD->Seed < 0 ) {
+                    exc_Q14 = -exc_Q14;
+                }
+
+                /* Add predictions */
+                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+                /* Update states */
+                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+                psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+                psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+                psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+                psSS[ 0 ].xq_Q14       = xq_Q14;
+
+                /* Update states for second best quantization */
+
+                /* Quantized excitation */
+                exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+                if ( psDD->Seed < 0 ) {
+                    exc_Q14 = -exc_Q14;
+                }
+
+
+                /* Add predictions */
+                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+                /* Update states */
+                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+                psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+                psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+                psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+                psSS[ 1 ].xq_Q14       = xq_Q14;
+            }
+        }
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+)
+{
+    opus_int            i, k, lag;
+    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    NSQ_del_dec_struct  *psDD;
+    __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+
+    /* prepare inv_gain_Q23 in packed 4 32-bits */
+    xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
+
+    for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
+        xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
+        /* equal shift right 4 bytes*/
+        xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+        xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
+        xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
+
+        xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
+        xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
+
+        xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
+
+        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 );
+    }
+
+    for( ; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        {
+            __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
+
+            /* prepare gain_adj_Q16 in packed 4 32-bits */
+            xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 );
+
+            for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
+            {
+                xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+                /* equal shift right 4 bytes*/
+                xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 );
+                xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 );
+                xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
+
+                _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+            }
+
+            for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+                NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+            }
+
+            /* Scale long-term prediction state */
+            if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+                for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
+                    sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+                }
+            }
+
+            for( k = 0; k < nStatesDelayedDecision; k++ ) {
+                psDD = &psDelDec[ k ];
+
+                /* Scale scalar states */
+                psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
+
+                /* Scale short-term prediction and shaping states */
+                for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+                    psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
+                }
+                for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+                    psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
+                }
+                for( i = 0; i < DECISION_DELAY; i++ ) {
+                    psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
+                    psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
+                }
+            }
+        }
+    }
+}
diff --git a/media/libopus/silk/x86/NSQ_sse.c b/media/libopus/silk/x86/NSQ_sse.c
new file mode 100644
index 0000000000..72f34fd6fc
--- /dev/null
+++ b/media/libopus/silk/x86/NSQ_sse.c
@@ -0,0 +1,720 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+);
+
+static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int32          table[][4]              /* I                                    */
+);
+
+void silk_NSQ_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            k, lag, start_idx, LSF_interpolation_flag;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+
+    opus_int32   table[ 64 ][ 4 ];
+    opus_int32   tmp1;
+    opus_int32   q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+
+    SAVE_STACK;
+
+    NSQ->rand_seed = psIndices->Seed;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+
+    /* 0 */
+    q1_Q10  = offset_Q10;
+    q2_Q10  = offset_Q10 + ( 1024 - QUANT_LEVEL_ADJUST_Q10 );
+    rd1_Q20 = q1_Q10 * Lambda_Q10;
+    rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+    table[ 32 ][ 0 ] = q1_Q10;
+    table[ 32 ][ 1 ] = q2_Q10;
+    table[ 32 ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+    table[ 32 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+
+    /* -1 */
+    q1_Q10  = offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 );
+    q2_Q10  = offset_Q10;
+    rd1_Q20 = - q1_Q10 * Lambda_Q10;
+    rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+    table[ 31 ][ 0 ] = q1_Q10;
+    table[ 31 ][ 1 ] = q2_Q10;
+    table[ 31 ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+    table[ 31 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+
+    /* > 0 */
+    for (k = 1; k <= 31; k++)
+    {
+        tmp1 = offset_Q10 + silk_LSHIFT( k, 10 );
+
+        q1_Q10  = tmp1 - QUANT_LEVEL_ADJUST_Q10;
+        q2_Q10  = tmp1 - QUANT_LEVEL_ADJUST_Q10 + 1024;
+        rd1_Q20 = q1_Q10 * Lambda_Q10;
+        rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+        table[ 32 + k ][ 0 ] = q1_Q10;
+        table[ 32 + k ][ 1 ] = q2_Q10;
+        table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+        table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+    }
+
+    /* < -1 */
+    for (k = -32; k <= -2; k++)
+    {
+        tmp1 = offset_Q10 + silk_LSHIFT( k, 10 );
+
+        q1_Q10  = tmp1 + QUANT_LEVEL_ADJUST_Q10;
+        q2_Q10  = tmp1 + QUANT_LEVEL_ADJUST_Q10 + 1024;
+        rd1_Q20 = - q1_Q10 * Lambda_Q10;
+        rd2_Q20 = - q2_Q10 * Lambda_Q10;
+
+        table[ 32 + k ][ 0 ] = q1_Q10;
+        table[ 32 + k ][ 1 ] = q2_Q10;
+        table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+        table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
+
+                NSQ->rewhite_flag = 1;
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+            }
+        }
+
+        silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
+
+        if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) )
+        {
+            silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+                AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ],
+                offset_Q10, psEncC->subfr_length, &(table[32]) );
+        }
+        else
+        {
+            silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+                AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
+                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+        }
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Update lagPrev for next frame */
+    NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech and noise shaping signals */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/***********************************/
+/* silk_noise_shape_quantizer_10_16  */
+/***********************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int32          table[][4]              /* I                                    */
+)
+{
+    opus_int     i;
+    opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
+    opus_int32   n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10;
+    opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+
+    __m128i xmm_tempa, xmm_tempb;
+
+    __m128i xmm_one;
+
+    __m128i psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF;
+    __m128i psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF;
+    __m128i a_Q12_01234567,        a_Q12_89ABCDEF;
+
+    __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210;
+    __m128i AR_shp_Q13_76543210;
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    /* Set up short term AR state */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
+
+    sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14;
+    xq_Q14         = psLPC_Q14[ 0 ];
+    LTP_pred_Q13   = 0;
+
+    /* load a_Q12 */
+    xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 );
+
+    /* load a_Q12[0] - a_Q12[7] */
+    a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) );
+    /* load a_Q12[ 8 ] - a_Q12[ 15 ] */
+    a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) );
+
+    a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one );
+    a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one );
+
+    /* load AR_shp_Q13 */
+    AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) );
+
+    /* load psLPC_Q14 */
+    xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 );
+
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    psLPC_Q14_hi_01234567 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    /* load sAR2_Q14 */
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    sAR2_Q14_hi_76543210 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    sAR2_Q14_lo_76543210 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    /* prepare 1 in 8 * 16bit */
+    xmm_one = _mm_set1_epi16(1);
+
+    for( i = 0; i < length; i++ )
+    {
+        /* Short-term prediction */
+        __m128i xmm_hi_07, xmm_hi_8F, xmm_lo_07, xmm_lo_8F;
+
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = 8; /* silk_RSHIFT( predictLPCOrder, 1 ); */
+
+        /* shift psLPC_Q14 */
+        psLPC_Q14_hi_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF, 2 );
+        psLPC_Q14_lo_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF, 2 );
+
+        psLPC_Q14_hi_01234567 = _mm_srli_si128( psLPC_Q14_hi_01234567, 2 );
+        psLPC_Q14_lo_01234567 = _mm_srli_si128( psLPC_Q14_lo_01234567, 2 );
+
+        psLPC_Q14_hi_01234567 = _mm_insert_epi16( psLPC_Q14_hi_01234567, (xq_Q14 >> 16), 7 );
+        psLPC_Q14_lo_01234567 = _mm_insert_epi16( psLPC_Q14_lo_01234567, (xq_Q14),       7 );
+
+        /* high part, use pmaddwd, results in 4 32-bit */
+        xmm_hi_07 = _mm_madd_epi16( psLPC_Q14_hi_01234567, a_Q12_01234567 );
+        xmm_hi_8F = _mm_madd_epi16( psLPC_Q14_hi_89ABCDEF, a_Q12_89ABCDEF );
+
+        /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed, _mm_srai_epi16(psLPC_Q14_lo_01234567, 15) */
+        xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_01234567 );
+        xmm_tempb = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_89ABCDEF );
+
+        xmm_tempa = _mm_and_si128( xmm_tempa, a_Q12_01234567 );
+        xmm_tempb = _mm_and_si128( xmm_tempb, a_Q12_89ABCDEF );
+
+        xmm_lo_07 = _mm_mulhi_epi16( psLPC_Q14_lo_01234567, a_Q12_01234567 );
+        xmm_lo_8F = _mm_mulhi_epi16( psLPC_Q14_lo_89ABCDEF, a_Q12_89ABCDEF );
+
+        xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa );
+        xmm_lo_8F = _mm_add_epi16( xmm_lo_8F, xmm_tempb );
+
+        xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one );
+        xmm_lo_8F = _mm_madd_epi16( xmm_lo_8F, xmm_one );
+
+        /* accumulate */
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_hi_8F );
+        xmm_lo_07 = _mm_add_epi32( xmm_lo_07, xmm_lo_8F );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) );
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) );
+
+        LPC_pred_Q10 += _mm_cvtsi128_si32( xmm_hi_07 );
+
+        /* Long-term prediction */
+        if ( opus_likely( signalType == TYPE_VOICED ) ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q13 = 2;
+            {
+                __m128i b_Q14_3210, b_Q14_0123, pred_lag_ptr_0123;
+
+                b_Q14_3210 = OP_CVTEPI16_EPI32_M64( b_Q14 );
+                b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B );
+
+                /* loaded: [0] [-1] [-2] [-3] */
+                pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                /* shuffle to [-3] [-2] [-1] [0] and to new xmm */
+                xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B );
+                /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */
+                xmm_tempa = _mm_mul_epi32( xmm_tempa, b_Q14_3210 );
+                /* right shift 2 bytes (16 bits), zero extended */
+                xmm_tempa = _mm_srli_si128( xmm_tempa, 2 );
+
+                /* a[1] * b[-1], a[3] * b[-3] */
+                pred_lag_ptr_0123 = _mm_mul_epi32( pred_lag_ptr_0123, b_Q14_0123 );
+                pred_lag_ptr_0123 = _mm_srli_si128( pred_lag_ptr_0123, 2 );
+
+                pred_lag_ptr_0123 = _mm_add_epi32( pred_lag_ptr_0123, xmm_tempa );
+                /* equal shift right 8 bytes*/
+                xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+                xmm_tempa = _mm_add_epi32( xmm_tempa, pred_lag_ptr_0123 );
+
+                LTP_pred_Q13 += _mm_cvtsi128_si32( xmm_tempa );
+
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+                pred_lag_ptr++;
+            }
+        }
+
+        /* Noise shape feedback */
+        NSQ->sAR2_Q14[ 9 ] = NSQ->sAR2_Q14[ 8 ];
+        NSQ->sAR2_Q14[ 8 ] = _mm_cvtsi128_si32( _mm_srli_si128(_mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ), 12 ) );
+
+        sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 );
+        sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 );
+
+        sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 );
+        sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14),       0 );
+
+        /* high part, use pmaddwd, results in 4 32-bit */
+        xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 );
+
+        /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed,_mm_srai_epi16(sAR2_Q14_lo_76543210, 15) */
+        xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), sAR2_Q14_lo_76543210 );
+        xmm_tempa = _mm_and_si128( xmm_tempa, AR_shp_Q13_76543210 );
+
+        xmm_lo_07 = _mm_mulhi_epi16( sAR2_Q14_lo_76543210, AR_shp_Q13_76543210 );
+        xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa );
+
+        xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one );
+
+        /* accumulate */
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) );
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) );
+
+        n_AR_Q12 = 5 + _mm_cvtsi128_si32( xmm_hi_07 );
+
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 8 ], AR_shp_Q13[ 8 ] );
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 9 ], AR_shp_Q13[ 9 ] );
+
+        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, sLF_AR_shp_Q14, Tilt_Q14 );
+
+        n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
+        n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 );
+
+        silk_assert( lag > 0 || signalType != TYPE_VOICED );
+
+        /* Combine prediction and noise shaping signals */
+        tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 );        /* Q12 */
+        tmp1 = silk_SUB32( tmp1, n_LF_Q12 );                                    /* Q12 */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
+            shp_lag_ptr++;
+
+            tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 );                       /* Q13 */
+            tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 );                          /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 3 );                                /* Q10 */
+        } else {
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 2 );                                /* Q10 */
+        }
+
+        r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 );                              /* residual error Q10 */
+
+        /* Generate dither */
+        NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
+
+        /* Flip sign depending on dither */
+        tmp2 = -r_Q10;
+        if ( NSQ->rand_seed < 0 ) r_Q10 = tmp2;
+
+        r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+        /* Find two quantization level candidates and measure their rate-distortion */
+        q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+        q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+
+        q1_Q10 = table[q1_Q0][0];
+        q2_Q10 = table[q1_Q0][1];
+
+        if (r_Q10 * table[q1_Q0][2] - table[q1_Q0][3] < 0)
+        {
+            q1_Q10 = q2_Q10;
+        }
+
+        pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 );
+
+        /* Excitation */
+        exc_Q14 = silk_LSHIFT( q1_Q10, 4 );
+
+        tmp2 = -exc_Q14;
+        if ( NSQ->rand_seed < 0 ) exc_Q14 = tmp2;
+
+        /* Add predictions */
+        LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 );
+        xq_Q14      = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 );
+
+        /* Update states */
+        psLPC_Q14++;
+        *psLPC_Q14 = xq_Q14;
+        sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
+
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
+        sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 );
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Make dither dependent on quantized signal */
+        NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
+    }
+
+    NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
+
+    /* Scale XQ back to normal level before saving */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH ];
+
+    /* write back sAR2_Q14 */
+    xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
+    xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
+    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
+    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
+
+    /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */
+    {
+        __m128i xmm_Gain_Q10;
+        __m128i xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, xmm_xq_Q14_7654, xmm_xq_Q14_x7x5;
+
+        /* prepare (1 << 7) in packed 4 32-bits */
+        xmm_tempa = _mm_set1_epi32( (1 << 7) );
+
+        /* prepare Gain_Q10 in packed 4 32-bits */
+        xmm_Gain_Q10 = _mm_set1_epi32( Gain_Q10 );
+
+        /* process xq */
+        for (i = 0; i < length - 7; i += 8)
+        {
+            xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) );
+            xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) );
+
+            /* equal shift right 4 bytes*/
+            xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+            /* equal shift right 4 bytes*/
+            xmm_xq_Q14_x7x5 = _mm_shuffle_epi32( xmm_xq_Q14_7654, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+            xmm_xq_Q14_3210 = _mm_mul_epi32( xmm_xq_Q14_3210, xmm_Gain_Q10 );
+            xmm_xq_Q14_x3x1 = _mm_mul_epi32( xmm_xq_Q14_x3x1, xmm_Gain_Q10 );
+            xmm_xq_Q14_7654 = _mm_mul_epi32( xmm_xq_Q14_7654, xmm_Gain_Q10 );
+            xmm_xq_Q14_x7x5 = _mm_mul_epi32( xmm_xq_Q14_x7x5, xmm_Gain_Q10 );
+
+            xmm_xq_Q14_3210 = _mm_srli_epi64( xmm_xq_Q14_3210, 16 );
+            xmm_xq_Q14_x3x1 = _mm_slli_epi64( xmm_xq_Q14_x3x1, 16 );
+            xmm_xq_Q14_7654 = _mm_srli_epi64( xmm_xq_Q14_7654, 16 );
+            xmm_xq_Q14_x7x5 = _mm_slli_epi64( xmm_xq_Q14_x7x5, 16 );
+
+            xmm_xq_Q14_3210 = _mm_blend_epi16( xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, 0xCC );
+            xmm_xq_Q14_7654 = _mm_blend_epi16( xmm_xq_Q14_7654, xmm_xq_Q14_x7x5, 0xCC );
+
+            /* silk_RSHIFT_ROUND(xq, 8) */
+            xmm_xq_Q14_3210 = _mm_add_epi32( xmm_xq_Q14_3210, xmm_tempa );
+            xmm_xq_Q14_7654 = _mm_add_epi32( xmm_xq_Q14_7654, xmm_tempa );
+
+            xmm_xq_Q14_3210 = _mm_srai_epi32( xmm_xq_Q14_3210, 8 );
+            xmm_xq_Q14_7654 = _mm_srai_epi32( xmm_xq_Q14_7654, 8 );
+
+            /* silk_SAT16 */
+            xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 );
+
+            /* save to xq */
+            _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 );
+        }
+    }
+    for ( ; i < length; i++)
+    {
+        xq[i] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) );
+    }
+
+    /* Update LPC synth buffer */
+    silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+}
+
+static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+)
+{
+    opus_int   i, lag;
+    opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+
+    /* prepare inv_gain_Q23 in packed 4 32-bits */
+    xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
+
+    for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
+        xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
+
+        /* equal shift right 4 bytes*/
+        xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+        xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
+        xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
+
+        xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
+        xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
+
+        xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
+
+        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 );
+    }
+
+    for( ; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
+
+        /* prepare gain_adj_Q16 in packed 4 32-bits */
+        xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16);
+
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
+        {
+            xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+            /* equal shift right 4 bytes*/
+            xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 );
+            xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 );
+            xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
+
+            _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+        }
+
+        for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
+
+        /* Scale short-term prediction and shaping states */
+        for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+            NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] );
+        }
+        for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+            NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
+        }
+    }
+}
diff --git a/media/libopus/silk/x86/SigProc_FIX_sse.h b/media/libopus/silk/x86/SigProc_FIX_sse.h
new file mode 100644
index 0000000000..61efa8da41
--- /dev/null
+++ b/media/libopus/silk/x86/SigProc_FIX_sse.h
@@ -0,0 +1,94 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SIGPROC_FIX_SSE_H
+#define SIGPROC_FIX_SSE_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+void silk_burg_modified_sse4_1(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
+    ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+
+#else
+
+extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */);
+
+#  define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
+    ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+
+#endif
+
+opus_int64 silk_inner_prod16_aligned_64_sse4_1(
+    const opus_int16 *inVec1,
+    const opus_int16 *inVec2,
+    const opus_int   len
+);
+
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+
+#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
+    ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len))
+
+#else
+
+extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_int16 *inVec1,
+                    const opus_int16 *inVec2,
+                    const opus_int   len);
+
+#  define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
+    ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
+
+#endif
+#endif
+#endif
diff --git a/media/libopus/silk/x86/VAD_sse.c b/media/libopus/silk/x86/VAD_sse.c
new file mode 100644
index 0000000000..4e90f4410d
--- /dev/null
+++ b/media/libopus/silk/x86/VAD_sse.c
@@ -0,0 +1,277 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/* Weighting factors for tilt measure */
+static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
+
+/***************************************/
+/* Get the speech activity level in Q8 */
+/***************************************/
+opus_int silk_VAD_GetSA_Q8_sse4_1(                  /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,            /* I/O  Encoder state                               */
+    const opus_int16            pIn[]               /* I    PCM input                                   */
+)
+{
+    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
+    opus_int   decimated_framelength1, decimated_framelength2;
+    opus_int   decimated_framelength;
+    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+    opus_int32 sumSquared, smooth_coef_Q16;
+    opus_int16 HPstateTmp;
+    VARDECL( opus_int16, X );
+    opus_int32 Xnrg[ VAD_N_BANDS ];
+    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
+    opus_int32 speech_nrg, x_tmp;
+    opus_int   X_offset[ VAD_N_BANDS ];
+    opus_int   ret = 0;
+    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+
+    SAVE_STACK;
+
+    /* Safety checks */
+    silk_assert( VAD_N_BANDS == 4 );
+    silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+    silk_assert( psEncC->frame_length <= 512 );
+    silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+
+    /***********************/
+    /* Filter and Decimate */
+    /***********************/
+    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+    /* Decimate into 4 bands:
+       0       L      3L       L              3L                             5L
+               -      --       -              --                             --
+               8       8       2               4                              4
+
+       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
+
+       They're arranged to allow the minimal ( frame_length / 4 ) extra
+       scratch space during the downsampling process */
+    X_offset[ 0 ] = 0;
+    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
+    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
+    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
+        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
+
+    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
+
+    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
+
+    /*********************************************/
+    /* HP filter on lowest band (differentiator) */
+    /*********************************************/
+    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+    HPstateTmp = X[ decimated_framelength - 1 ];
+    for( i = decimated_framelength - 1; i > 0; i-- ) {
+        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
+        X[ i ]     -= X[ i - 1 ];
+    }
+    X[ 0 ] -= psSilk_VAD->HPstate;
+    psSilk_VAD->HPstate = HPstateTmp;
+
+    /*************************************/
+    /* Calculate the energy in each band */
+    /*************************************/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Find the decimated framelength in the non-uniformly divided bands */
+        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
+
+        /* Split length into subframe lengths */
+        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
+        dec_subframe_offset = 0;
+
+        /* Compute energy per sub-frame */
+        /* initialize with summed energy of last subframe */
+        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
+        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
+            __m128i xmm_X, xmm_acc;
+            sumSquared = 0;
+
+            xmm_acc = _mm_setzero_si128();
+
+            for( i = 0; i < dec_subframe_length - 7; i += 8 )
+            {
+                xmm_X   = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
+                xmm_X   = _mm_srai_epi16( xmm_X, 3 );
+                xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
+                xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
+            }
+
+            xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) );
+            xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) );
+
+            sumSquared += _mm_cvtsi128_si32( xmm_acc );
+
+            for( ; i < dec_subframe_length; i++ ) {
+                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
+                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
+                x_tmp = silk_RSHIFT(
+                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
+                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
+
+                /* Safety check */
+                silk_assert( sumSquared >= 0 );
+            }
+
+            /* Add/saturate summed energy of current subframe */
+            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
+            } else {
+                /* Look-ahead subframe */
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
+            }
+
+            dec_subframe_offset += dec_subframe_length;
+        }
+        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
+    }
+
+    /********************/
+    /* Noise estimation */
+    /********************/
+    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
+
+    /***********************************************/
+    /* Signal-plus-noise to noise ratio estimation */
+    /***********************************************/
+    sumSquared = 0;
+    input_tilt = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
+        if( speech_nrg > 0 ) {
+            /* Divide, with sufficient resolution */
+            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
+            } else {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
+            }
+
+            /* Convert to log domain */
+            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
+
+            /* Sum-of-squares */
+            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
+
+            /* Tilt measure */
+            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
+                /* Scale down SNR value for small subband speech energies */
+                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
+            }
+            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
+        } else {
+            NrgToNoiseRatio_Q8[ b ] = 256;
+        }
+    }
+
+    /* Mean-of-squares */
+    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
+
+    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
+    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
+
+    /*********************************/
+    /* Speech Probability Estimation */
+    /*********************************/
+    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
+
+    /**************************/
+    /* Frequency Tilt Measure */
+    /**************************/
+    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
+
+    /**************************************************/
+    /* Scale the sigmoid output based on power levels */
+    /**************************************************/
+    speech_nrg = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
+        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
+    }
+
+    /* Power scaling */
+    if( speech_nrg <= 0 ) {
+        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
+    } else if( speech_nrg < 32768 ) {
+        if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
+        } else {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
+        }
+
+        /* square-root */
+        speech_nrg = silk_SQRT_APPROX( speech_nrg );
+        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
+    }
+
+    /* Copy the resulting speech activity in Q8 */
+    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
+
+    /***********************************/
+    /* Energy Level and SNR estimation */
+    /***********************************/
+    /* Smoothing coefficient */
+    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
+
+    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+        smooth_coef_Q16 >>= 1;
+    }
+
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* compute smoothed energy-to-noise ratio per band */
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
+            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
+
+        /* signal to noise ratio in dB per band */
+        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
+        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
+        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
+    }
+
+    RESTORE_STACK;
+    return( ret );
+}
diff --git a/media/libopus/silk/x86/VQ_WMat_EC_sse.c b/media/libopus/silk/x86/VQ_WMat_EC_sse.c
new file mode 100644
index 0000000000..74d6c6d0ec
--- /dev/null
+++ b/media/libopus/silk/x86/VQ_WMat_EC_sse.c
@@ -0,0 +1,142 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+
+/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
+void silk_VQ_WMat_EC_sse4_1(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+)
+{
+    opus_int   k, gain_tmp_Q7;
+    const opus_int8 *cb_row_Q7;
+    opus_int16 diff_Q14[ 5 ];
+    opus_int32 sum1_Q14, sum2_Q16;
+
+    __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5;
+    /* Loop over codebook */
+    *rate_dist_Q14 = silk_int32_MAX;
+    cb_row_Q7 = cb_Q7;
+    for( k = 0; k < L; k++ ) {
+        gain_tmp_Q7 = cb_gain_Q7[k];
+
+        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
+
+        C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] );
+        C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] );
+        C_tmp2 = _mm_slli_epi32( C_tmp2, 7 );
+        C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 );
+
+        diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 );
+        diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 );
+        diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 );
+        diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 );
+
+        /* Weighted rate */
+        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
+
+        /* Penalty for too large gain */
+        sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* first row of W_Q18 */
+        C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) );
+        C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 );
+        C_tmp4 = _mm_srli_si128( C_tmp4, 2 );
+
+        C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */
+        C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */
+
+        C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 );
+        C_tmp5 = _mm_srli_si128( C_tmp5, 2 );
+
+        C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 );
+        C_tmp5 = _mm_slli_epi32( C_tmp5, 1 );
+
+        C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
+        sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 );
+
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  0 ], diff_Q14[ 0 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 0 ] );
+
+        /* second row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  7 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  8 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  9 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  6 ], diff_Q14[ 1 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 1 ] );
+
+        /* third row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 13 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 2 ] );
+
+        /* fourth row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 19 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 3 ] );
+
+        /* last row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 24 ], diff_Q14[ 4 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* find best */
+        if( sum1_Q14 < *rate_dist_Q14 ) {
+            *rate_dist_Q14 = sum1_Q14;
+            *ind = (opus_int8)k;
+            *gain_Q7 = gain_tmp_Q7;
+        }
+
+        /* Go to next cbk vector */
+        cb_row_Q7 += LTP_ORDER;
+    }
+}
diff --git a/media/libopus/silk/x86/main_sse.h b/media/libopus/silk/x86/main_sse.h
new file mode 100644
index 0000000000..afd5ec26e1
--- /dev/null
+++ b/media/libopus/silk/x86/main_sse.h
@@ -0,0 +1,276 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MAIN_SSE_H
+#define MAIN_SSE_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+
+#  define OVERRIDE_silk_VQ_WMat_EC
+
+void silk_VQ_WMat_EC_sse4_1(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+);
+
+#if defined OPUS_X86_PRESUME_SSE4_1
+
+#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L, arch) \
+    ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L))
+
+#else
+
+extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+);
+
+#  define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L, arch) \
+    ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
+                          mu_Q9, max_gain_Q7, L))
+
+#endif
+
+#  define OVERRIDE_silk_NSQ
+
+void silk_NSQ_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+#if defined OPUS_X86_PRESUME_SSE4_1
+
+#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+
+#else
+
+extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+#  define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+
+#endif
+
+#  define OVERRIDE_silk_NSQ_del_dec
+
+void silk_NSQ_del_dec_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+#if defined OPUS_X86_PRESUME_SSE4_1
+
+#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+
+#else
+
+extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+#  define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+    ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
+                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+
+#endif
+
+void silk_noise_shape_quantizer(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            Lambda_Q10,             /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
+    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+);
+
+/**************************/
+/* Noise level estimation */
+/**************************/
+void silk_VAD_GetNoiseLevels(
+    const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
+    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
+);
+
+#  define OVERRIDE_silk_VAD_GetSA_Q8
+
+opus_int silk_VAD_GetSA_Q8_sse4_1(
+    silk_encoder_state *psEnC,
+    const opus_int16   pIn[]
+);
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
+
+#else
+
+#  define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
+     ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
+
+extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
+     silk_encoder_state *psEnC,
+     const opus_int16   pIn[]);
+
+#  define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
+
+#endif
+
+void silk_warped_LPC_analysis_filter_FIX_sse4_1(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+);
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
+    ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
+
+#else
+
+extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+);
+
+#  define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
+    ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
+
+#endif
+
+# endif
+#endif
diff --git a/media/libopus/silk/x86/x86_silk_map.c b/media/libopus/silk/x86/x86_silk_map.c
new file mode 100644
index 0000000000..818841f2c1
--- /dev/null
+++ b/media/libopus/silk/x86/x86_silk_map.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+#include "config.h"
+#endif
+
+#include "celt/x86/x86cpu.h"
+#include "structs.h"
+#include "SigProc_FIX.h"
+#include "pitch.h"
+#include "main.h"
+
+#if !defined(OPUS_X86_PRESUME_SSE4_1)
+
+#if defined(FIXED_POINT)
+
+#include "fixed/main_FIX.h"
+
+opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    const opus_int16 *inVec1,
+    const opus_int16 *inVec2,
+    const opus_int   len
+) = {
+  silk_inner_prod16_aligned_64_c,                  /* non-sse */
+  silk_inner_prod16_aligned_64_c,
+  silk_inner_prod16_aligned_64_c,
+  MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 )  /* avx */
+};
+
+#endif
+
+opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    silk_encoder_state *psEncC,
+    const opus_int16   pIn[]
+) = {
+  silk_VAD_GetSA_Q8_c,                  /* non-sse */
+  silk_VAD_GetSA_Q8_c,
+  silk_VAD_GetSA_Q8_c,
+  MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 )  /* avx */
+};
+
+void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+) = {
+  silk_NSQ_c,                  /* non-sse */
+  silk_NSQ_c,
+  silk_NSQ_c,
+  MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_NSQ )  /* avx */
+};
+
+void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+) = {
+  silk_VQ_WMat_EC_c,                  /* non-sse */
+  silk_VQ_WMat_EC_c,
+  silk_VQ_WMat_EC_c,
+  MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_VQ_WMat_EC )  /* avx */
+};
+
+void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+) = {
+  silk_NSQ_del_dec_c,                  /* non-sse */
+  silk_NSQ_del_dec_c,
+  silk_NSQ_del_dec_c,
+  MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_NSQ_del_dec )  /* avx */
+};
+
+#if defined(FIXED_POINT)
+
+void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    opus_int32                  state[],                    /* I/O  State [order + 1]                   */
+    opus_int32                  res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+) = {
+  silk_warped_LPC_analysis_filter_FIX_c,                  /* non-sse */
+  silk_warped_LPC_analysis_filter_FIX_c,
+  silk_warped_LPC_analysis_filter_FIX_c,
+  MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX )  /* avx */
+};
+
+void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+) = {
+  silk_burg_modified_c,                  /* non-sse */
+  silk_burg_modified_c,
+  silk_burg_modified_c,
+  MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */
+  MAY_HAVE_SSE4_1( silk_burg_modified )  /* avx */
+};
+
+#endif
+#endif
diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild
index 4081418221..dfc2c62e83 100644
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -18,11 +18,28 @@ celt_sources = [
     'celt/vq.c',
 ]
 
-celt_nonunified_sources = [
+opus_nonunified_sources = [
     # Disabled because of name clash of opus_custom_encoder_get_size.
     'celt/celt_decoder.c',
-    # Disabled because of name clash of opus_custom_encoder_get_size.
     'celt/celt_encoder.c',
+    # Disabled for (safe) warning about QA redefinition.
+    'silk/LPC_inv_pred_gain.c',
+    'silk/NLSF2A.c',
+]
+
+celt_sources_sse = [
+    'celt/x86/pitch_sse.c',
+    'celt/x86/x86_celt_map.c',
+    'celt/x86/x86cpu.c',
+]
+
+celt_sources_sse2 = [
+    'celt/x86/pitch_sse2.c',
+]
+
+celt_sources_sse4_1 = [
+    'celt/x86/celt_lpc_sse.c',
+    'celt/x86/pitch_sse4_1.c',
 ]
 
 celt_sources_arm = [
@@ -38,6 +55,13 @@ celt_am_sources_arm_asm = [
     'celt/arm/armopts.s.in',
 ]
 
+celt_sources_arm_neon_intr = [
+    'celt/arm/celt_ne10_fft.c',
+    'celt/arm/celt_ne10_mdct.c',
+    'celt/arm/celt_neon_intr.c',
+    'CELT_SOURCES_ARM_NE10=',
+]
+
 opus_sources = [
     'src/opus.c',
     'src/opus_decoder.c',
@@ -88,8 +112,6 @@ silk_sources = [
     'silk/log2lin.c',
     'silk/LP_variable_cutoff.c',
     'silk/LPC_analysis_filter.c',
-    'silk/LPC_inv_pred_gain.c',
-    'silk/NLSF2A.c',
     'silk/NLSF_decode.c',
     'silk/NLSF_del_dec_quant.c',
     'silk/NLSF_encode.c',
@@ -133,6 +155,14 @@ silk_sources = [
     'silk/VQ_WMat_EC.c',
 ]
 
+silk_sources_sse4_1 = [
+    'silk/x86/NSQ_del_dec_sse.c',
+    'silk/x86/NSQ_sse.c',
+    'silk/x86/VAD_sse.c',
+    'silk/x86/VQ_WMat_EC_sse.c',
+    'silk/x86/x86_silk_map.c',
+]
+
 silk_sources_fixed = [
     'silk/fixed/apply_sine_window_FIX.c',
     'silk/fixed/autocorr_FIX.c',
@@ -161,6 +191,12 @@ silk_sources_fixed = [
     'silk/fixed/warped_autocorrelation_FIX.c',
 ]
 
+silk_sources_fixed_sse4_1 = [
+    'silk/fixed/x86/burg_modified_FIX_sse.c',
+    'silk/fixed/x86/prefilter_FIX_sse.c',
+    'silk/fixed/x86/vector_ops_FIX_sse.c',
+]
+
 silk_sources_float = [
     'silk/float/apply_sine_window_FLP.c',
     'silk/float/autocorrelation_FLP.c',
diff --git a/media/libopus/src/analysis.c b/media/libopus/src/analysis.c
index 778a62aabf..360ebcc8dd 100644
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -39,8 +39,6 @@
 #include "mlp.h"
 #include "stack_alloc.h"
 
-extern const MLP net;
-
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
@@ -140,6 +138,21 @@ static OPUS_INLINE float fast_atan2f(float y, float x) {
    }
 }
 
+void tonality_analysis_init(TonalityAnalysisState *tonal)
+{
+  /* Initialize reusable fields. */
+  tonal->arch = opus_select_arch();
+  /* Clear remaining fields. */
+  tonality_analysis_reset(tonal);
+}
+
+void tonality_analysis_reset(TonalityAnalysisState *tonal)
+{
+  /* Clear non-reusable fields. */
+  char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
+  OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
+}
+
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
 {
    int pos;
@@ -189,7 +202,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    info_out->music_prob = psum;
 }
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
     int i, b;
     const kiss_fft_state *kfft;
@@ -262,7 +275,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
-    opus_fft(kfft, in, out);
+    opus_fft(kfft, in, out, tonal->arch);
+#ifndef FIXED_POINT
+    /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
+    if (celt_isnan(out[0].r))
+    {
+       info->valid = 0;
+       RESTORE_STACK;
+       return;
+    }
+#endif
 
     for (i=1;i<N2;i++)
     {
@@ -334,6 +356,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
           tE += binE*tonality[i];
           nE += binE*2.f*(.5f-noisiness[i]);
        }
+#ifndef FIXED_POINT
+       /* Check for extreme band energies that could cause NaNs later. */
+       if (!(E<1e9f) || celt_isnan(E))
+       {
+          info->valid = 0;
+          RESTORE_STACK;
+          return;
+       }
+#endif
+
        tonal->E[tonal->E_count][b] = E;
        frame_noisiness += nE/(1e-15f+E);
 
@@ -611,8 +643,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
     info->noisiness = frame_noisiness;
     info->valid = 1;
-    if (info_out!=NULL)
-       OPUS_COPY(info_out, info, 1);
     RESTORE_STACK;
 }
 
@@ -631,7 +661,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
       pcm_len = analysis_frame_size - analysis->analysis_offset;
       offset = analysis->analysis_offset;
       do {
-         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
          offset += 480;
          pcm_len -= 480;
       } while (pcm_len>0);
diff --git a/media/libopus/src/analysis.h b/media/libopus/src/analysis.h
index be0388faa3..9eae56a525 100644
--- a/media/libopus/src/analysis.h
+++ b/media/libopus/src/analysis.h
@@ -39,6 +39,8 @@
 #define DETECT_SIZE 200
 
 typedef struct {
+   int arch;
+#define TONALITY_ANALYSIS_RESET_START angle
    float angle[240];
    float d_angle[240];
    float d2_angle[240];
@@ -78,8 +80,19 @@ typedef struct {
    AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
-     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
+/** Initialize a TonalityAnalysisState struct.
+ *
+ * This performs some possibly slow initialization steps which should
+ * not be repeated every analysis step. No allocated memory is retained
+ * by the state struct, so no cleanup call is required.
+ */
+void tonality_analysis_init(TonalityAnalysisState *analysis);
+
+/** Reset a TonalityAnalysisState stuct.
+ *
+ * Call this when there's a discontinuity in the data.
+ */
+void tonality_analysis_reset(TonalityAnalysisState *analysis);
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
 
diff --git a/media/libopus/src/mlp.c b/media/libopus/src/mlp.c
index 4638602667..ff9e50df47 100644
--- a/media/libopus/src/mlp.c
+++ b/media/libopus/src/mlp.c
@@ -41,77 +41,82 @@
 #if 0
 static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 {
-	int i;
-	opus_val16 xx; /* Q11 */
-	/*double x, y;*/
-	opus_val16 dy, yy; /* Q14 */
-	/*x = 1.9073e-06*_x;*/
-	if (_x>=QCONST32(8,19))
-		return QCONST32(1.,14);
-	if (_x<=-QCONST32(8,19))
-		return -QCONST32(1.,14);
-	xx = EXTRACT16(SHR32(_x, 8));
-	/*i = lrint(25*x);*/
-	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
-	/*x -= .04*i;*/
-	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
-	/*x = xx*(1./2048);*/
-	/*y = tansig_table[250+i];*/
-	yy = tansig_table[250+i];
-	/*y = yy*(1./16384);*/
-	dy = 16384-MULT16_16_Q14(yy,yy);
-	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
-	return yy;
+    int i;
+    opus_val16 xx; /* Q11 */
+    /*double x, y;*/
+    opus_val16 dy, yy; /* Q14 */
+    /*x = 1.9073e-06*_x;*/
+    if (_x>=QCONST32(8,19))
+        return QCONST32(1.,14);
+    if (_x<=-QCONST32(8,19))
+        return -QCONST32(1.,14);
+    xx = EXTRACT16(SHR32(_x, 8));
+    /*i = lrint(25*x);*/
+    i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+    /*x -= .04*i;*/
+    xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+    /*x = xx*(1./2048);*/
+    /*y = tansig_table[250+i];*/
+    yy = tansig_table[250+i];
+    /*y = yy*(1./16384);*/
+    dy = 16384-MULT16_16_Q14(yy,yy);
+    yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+    return yy;
 }
 #else
 /*extern const float tansig_table[501];*/
 static OPUS_INLINE float tansig_approx(float x)
 {
-	int i;
-	float y, dy;
-	float sign=1;
-	/* Tests are reversed to catch NaNs */
+    int i;
+    float y, dy;
+    float sign=1;
+    /* Tests are reversed to catch NaNs */
     if (!(x<8))
         return 1;
     if (!(x>-8))
         return -1;
-	if (x<0)
-	{
-	   x=-x;
-	   sign=-1;
-	}
-	i = (int)floor(.5f+25*x);
-	x -= .04f*i;
-	y = tansig_table[i];
-	dy = 1-y*y;
-	y = y + x*dy*(1 - y*x);
-	return sign*y;
+#ifndef FIXED_POINT
+    /* Another check in case of -ffast-math */
+    if (celt_isnan(x))
+       return 0;
+#endif
+    if (x<0)
+    {
+       x=-x;
+       sign=-1;
+    }
+    i = (int)floor(.5f+25*x);
+    x -= .04f*i;
+    y = tansig_table[i];
+    dy = 1-y*y;
+    y = y + x*dy*(1 - y*x);
+    return sign*y;
 }
 #endif
 
 #if 0
 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
 {
-	int j;
-	opus_val16 hidden[MAX_NEURONS];
-	const opus_val16 *W = m->weights;
-	/* Copy to tmp_in */
-	for (j=0;j<m->topo[1];j++)
-	{
-		int k;
-		opus_val32 sum = SHL32(EXTEND32(*W++),8);
-		for (k=0;k<m->topo[0];k++)
-			sum = MAC16_16(sum, in[k],*W++);
-		hidden[j] = tansig_approx(sum);
-	}
-	for (j=0;j<m->topo[2];j++)
-	{
-		int k;
-		opus_val32 sum = SHL32(EXTEND32(*W++),14);
-		for (k=0;k<m->topo[1];k++)
-			sum = MAC16_16(sum, hidden[k], *W++);
-		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
-	}
+    int j;
+    opus_val16 hidden[MAX_NEURONS];
+    const opus_val16 *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        opus_val32 sum = SHL32(EXTEND32(*W++),8);
+        for (k=0;k<m->topo[0];k++)
+            sum = MAC16_16(sum, in[k],*W++);
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        opus_val32 sum = SHL32(EXTEND32(*W++),14);
+        for (k=0;k<m->topo[1];k++)
+            sum = MAC16_16(sum, hidden[k], *W++);
+        out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+    }
 }
 #else
 void mlp_process(const MLP *m, const float *in, float *out)
diff --git a/media/libopus/src/mlp.h b/media/libopus/src/mlp.h
index 86c8e0617d..618e246e2c 100644
--- a/media/libopus/src/mlp.h
+++ b/media/libopus/src/mlp.h
@@ -31,11 +31,13 @@
 #include "arch.h"
 
 typedef struct {
-	int layers;
-	const int *topo;
-	const float *weights;
+    int layers;
+    const int *topo;
+    const float *weights;
 } MLP;
 
+extern const MLP net;
+
 void mlp_process(const MLP *m, const float *in, float *out);
 
 #endif /* _MLP_H_ */
diff --git a/media/libopus/src/mlp_data.c b/media/libopus/src/mlp_data.c
index 401c4c0250..c2fda4e2e5 100644
--- a/media/libopus/src/mlp_data.c
+++ b/media/libopus/src/mlp_data.c
@@ -1,6 +1,10 @@
 /* The contents of this file was automatically generated by mlp_train.c
    It contains multi-layer perceptron (MLP) weights. */
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include "mlp.h"
 
 /* RMS error was 0.138320, seed was 1361535663 */
diff --git a/media/libopus/src/opus.c b/media/libopus/src/opus.c
index 30890b9cbf..e9ce93b308 100644
--- a/media/libopus/src/opus.c
+++ b/media/libopus/src/opus.c
@@ -166,6 +166,27 @@ static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *siz
    }
 }
 
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+      opus_int32 Fs)
+{
+   int audiosize;
+   if (data[0]&0x80)
+   {
+      audiosize = ((data[0]>>3)&0x3);
+      audiosize = (Fs<<audiosize)/400;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+   } else {
+      audiosize = ((data[0]>>3)&0x3);
+      if (audiosize == 3)
+         audiosize = Fs*60/1000;
+      else
+         audiosize = (Fs<<audiosize)/100;
+   }
+   return audiosize;
+}
+
 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
       int self_delimited, unsigned char *out_toc,
       const unsigned char *frames[48], opus_int16 size[48],
diff --git a/media/libopus/src/opus_decoder.c b/media/libopus/src/opus_decoder.c
index 919ba521b5..080bec5072 100644
--- a/media/libopus/src/opus_decoder.c
+++ b/media/libopus/src/opus_decoder.c
@@ -33,7 +33,7 @@
 # error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details."
 #endif
 
-#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW)
 # pragma message "You appear to be compiling without optimization, if so opus will be very slow."
 #endif
 
@@ -59,6 +59,7 @@ struct OpusDecoder {
    opus_int32   Fs;          /** Sampling rate (at the API level) */
    silk_DecControlStruct DecControl;
    int          decode_gain;
+   int          arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define OPUS_DECODER_RESET_START stream_channels
@@ -77,12 +78,6 @@ struct OpusDecoder {
    opus_uint32  rangeFinal;
 };
 
-#ifdef FIXED_POINT
-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
-   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
-}
-#endif
-
 
 int opus_decoder_get_size(int channels)
 {
@@ -137,6 +132,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
 
    st->prev_mode = 0;
    st->frame_size = Fs/400;
+   st->arch = opus_select_arch();
    return OPUS_OK;
 }
 
@@ -215,7 +211,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    VARDECL(opus_val16, pcm_transition_silk);
    int pcm_transition_celt_size;
    VARDECL(opus_val16, pcm_transition_celt);
-   opus_val16 *pcm_transition;
+   opus_val16 *pcm_transition=NULL;
    int redundant_audio_size;
    VARDECL(opus_val16, redundant_audio);
 
@@ -230,6 +226,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    int F2_5, F5, F10, F20;
    const opus_val16 *window;
    opus_uint32 redundant_rng = 0;
+   int celt_accum;
    ALLOC_STACK;
 
    silk_dec = (char*)st+st->silk_dec_offset;
@@ -295,6 +292,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       }
    }
 
+   /* In fixed-point, we can tell CELT to do the accumulation on top of the
+      SILK PCM buffer. This saves some stack space. */
+#ifdef FIXED_POINT
+   celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);
+#else
+   celt_accum = 0;
+#endif
+
    pcm_transition_silk_size = ALLOC_NONE;
    pcm_transition_celt_size = ALLOC_NONE;
    if (data!=NULL && st->prev_mode > 0 && (
@@ -325,14 +330,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    }
 
    /* Don't allocate any memory when in CELT-only mode */
-   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+   pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
    ALLOC(pcm_silk, pcm_silk_size, opus_int16);
 
    /* SILK processing */
    if (mode != MODE_CELT_ONLY)
    {
       int lost_flag, decoded_samples;
-      opus_int16 *pcm_ptr = pcm_silk;
+      opus_int16 *pcm_ptr;
+#ifdef FIXED_POINT
+      if (celt_accum)
+         pcm_ptr = pcm;
+      else
+#endif
+         pcm_ptr = pcm_silk;
 
       if (st->prev_mode==MODE_CELT_ONLY)
          silk_InitDecoder( silk_dec );
@@ -366,7 +377,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
         /* Call SILK decoder */
         int first_frame = decoded_samples == 0;
         silk_ret = silk_Decode( silk_dec, &st->DecControl,
-                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
+                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch );
         if( silk_ret ) {
            if (lost_flag) {
               /* PLC failure should not be fatal */
@@ -462,7 +473,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    {
       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
       celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
-                          redundant_audio, F5, NULL);
+                          redundant_audio, F5, NULL, 0);
       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
    }
 
@@ -477,25 +488,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
          celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
       /* Decode CELT */
       celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
-                                     len, pcm, celt_frame_size, &dec);
+                                     len, pcm, celt_frame_size, &dec, celt_accum);
    } else {
       unsigned char silence[2] = {0xFF, 0xFF};
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = 0;
+      if (!celt_accum)
+      {
+         for (i=0;i<frame_size*st->channels;i++)
+            pcm[i] = 0;
+      }
       /* For hybrid -> SILK transitions, we let the CELT MDCT
          do a fade-out by decoding a silence frame */
       if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
       {
          celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
-         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum);
       }
    }
 
-   if (mode != MODE_CELT_ONLY)
+   if (mode != MODE_CELT_ONLY && !celt_accum)
    {
 #ifdef FIXED_POINT
       for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+         pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));
 #else
       for (i=0;i<frame_size*st->channels;i++)
          pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
@@ -514,7 +528,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
 
-      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0);
       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
       smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
                   pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
@@ -710,6 +724,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
 {
    VARDECL(opus_int16, out);
    int ret, i;
+   int nb_samples;
    ALLOC_STACK;
 
    if(frame_size<=0)
@@ -717,6 +732,14 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
    ALLOC(out, frame_size*st->channels, opus_int16);
 
    ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
@@ -737,6 +760,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
 {
    VARDECL(float, out);
    int ret, i;
+   int nb_samples;
    ALLOC_STACK;
 
    if(frame_size<=0)
@@ -745,6 +769,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
       return OPUS_BAD_ARG;
    }
 
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
    ALLOC(out, frame_size*st->channels, float);
 
    ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
@@ -904,27 +936,6 @@ int opus_packet_get_bandwidth(const unsigned char *data)
    return bandwidth;
 }
 
-int opus_packet_get_samples_per_frame(const unsigned char *data,
-      opus_int32 Fs)
-{
-   int audiosize;
-   if (data[0]&0x80)
-   {
-      audiosize = ((data[0]>>3)&0x3);
-      audiosize = (Fs<<audiosize)/400;
-   } else if ((data[0]&0x60) == 0x60)
-   {
-      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
-   } else {
-      audiosize = ((data[0]>>3)&0x3);
-      if (audiosize == 3)
-         audiosize = Fs*60/1000;
-      else
-         audiosize = (Fs<<audiosize)/100;
-   }
-   return audiosize;
-}
-
 int opus_packet_get_nb_channels(const unsigned char *data)
 {
    return (data[0]&0x4) ? 2 : 1;
diff --git a/media/libopus/src/opus_encoder.c b/media/libopus/src/opus_encoder.c
index fbd3de6392..a7e19127d6 100644
--- a/media/libopus/src/opus_encoder.c
+++ b/media/libopus/src/opus_encoder.c
@@ -38,6 +38,7 @@
 #include "float_cast.h"
 #include "opus.h"
 #include "arch.h"
+#include "pitch.h"
 #include "opus_private.h"
 #include "os_support.h"
 #include "cpu_support.h"
@@ -80,6 +81,10 @@ struct OpusEncoder {
     int          lsb_depth;
     int          encoder_buffer;
     int          lfe;
+    int          arch;
+#ifndef DISABLE_FLOAT_API
+    TonalityAnalysisState analysis;
+#endif
 
 #define OPUS_ENCODER_RESET_START stream_channels
     int          stream_channels;
@@ -99,12 +104,9 @@ struct OpusEncoder {
     StereoWidthState width_mem;
     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
 #ifndef DISABLE_FLOAT_API
-    TonalityAnalysisState analysis;
     int          detected_bandwidth;
-    int          analysis_offset;
 #endif
     opus_uint32  rangeFinal;
-    int          arch;
 };
 
 /* Transition tables for the voice and music. First column is the
@@ -231,7 +233,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->lsb_depth = 24;
     st->variable_duration = OPUS_FRAMESIZE_ARG;
 
-    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 
+    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
        + 1.5 ms for SILK resamplers and stereo prediction) */
     st->delay_compensation = st->Fs/250;
 
@@ -242,6 +244,10 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->mode = MODE_HYBRID;
     st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
 
+#ifndef DISABLE_FLOAT_API
+    tonality_analysis_init(&st->analysis);
+#endif
+
     return OPUS_OK;
 }
 
@@ -648,7 +654,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_
    return best_state;
 }
 
-int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs,
                 int bitrate, opus_val16 tonality, float *mem, int buffering,
                 downmix_func downmix)
 {
@@ -660,6 +666,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
    int bestLM=0;
    int subframe;
    int pos;
+   int offset;
    VARDECL(opus_val32, sub);
 
    subframe = Fs/400;
@@ -670,9 +677,8 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
    {
       /* Consider the CELT delay when not in restricted-lowdelay */
       /* We assume the buffering is between 2.5 and 5 ms */
-      int offset = 2*subframe - buffering;
+      offset = 2*subframe - buffering;
       celt_assert(offset>=0 && offset <= subframe);
-      x += C*offset;
       len -= offset;
       e[1]=mem[1];
       e_1[1]=1.f/(EPSILON+mem[1]);
@@ -681,6 +687,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
       pos = 3;
    } else {
       pos=1;
+      offset=0;
    }
    N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
    /* Just silencing a warning, it's really initialized later */
@@ -692,7 +699,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
       int j;
       tmp=EPSILON;
 
-      downmix(x, sub, subframe, i*subframe, 0, -2, C);
+      downmix(x, sub, subframe, i*subframe+offset, 0, -2, C);
       if (i==0)
          memx = sub[0];
       for (j=0;j<subframe;j++)
@@ -836,6 +843,12 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
          LM--;
       frame_size = (Fs/400<<LM);
    } else
+#else
+   (void)analysis_pcm;
+   (void)C;
+   (void)bitrate_bps;
+   (void)delay_compensation;
+   (void)downmix;
 #endif
    {
       frame_size = frame_size_select(frame_size, variable_duration, Fs);
@@ -924,7 +937,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
                 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
-                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
+                int analysis_channels, downmix_func downmix, int float_api)
 {
     void *silk_enc;
     CELTEncoder *celt_enc;
@@ -954,9 +968,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     int total_buffer;
     opus_val16 stereo_width;
     const CELTMode *celt_mode;
+#ifndef DISABLE_FLOAT_API
     AnalysisInfo analysis_info;
     int analysis_read_pos_bak=-1;
     int analysis_read_subframe_bak=-1;
+#endif
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -982,9 +998,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 
     lsb_depth = IMIN(lsb_depth, st->lsb_depth);
 
-    analysis_info.valid = 0;
     celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
 #ifndef DISABLE_FLOAT_API
+    analysis_info.valid = 0;
 #ifdef FIXED_POINT
     if (st->silk_mode.complexity >= 10 && st->Fs==48000)
 #else
@@ -997,6 +1013,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
              c1, c2, analysis_channels, st->Fs,
              lsb_depth, downmix, &analysis_info);
     }
+#else
+    (void)analysis_pcm;
+    (void)analysis_size;
 #endif
 
     st->voice_ratio = -1;
@@ -1377,7 +1396,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
              st->user_forced_mode = MODE_CELT_ONLY;
           tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
                 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
-                NULL, 0, c1, c2, analysis_channels, downmix);
+                NULL, 0, c1, c2, analysis_channels, downmix, float_api);
           if (tmp_len<0)
           {
              RESTORE_STACK;
@@ -1424,8 +1443,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     ec_enc_init(&enc, data, max_data_bytes-1);
 
     ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
-    for (i=0;i<total_buffer*st->channels;i++)
-       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+    OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels);
 
     if (st->mode == MODE_CELT_ONLY)
        hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -1444,7 +1462,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     } else {
        dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
     }
-
+#ifndef FIXED_POINT
+    if (float_api)
+    {
+       opus_val32 sum;
+       sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch);
+       /* This should filter out both NaNs and ridiculous signals that could
+          cause NaNs further down. */
+       if (!(sum < 1e9f) || celt_isnan(sum))
+       {
+          OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels);
+          st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
+       }
+    }
+#endif
 
 
     /* SILK processing */
@@ -1599,8 +1630,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
             prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
             gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
                   0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
-            for(i=0;i<prefill_offset;i++)
-               st->delay_buffer[i]=0;
+            OPUS_CLEAR(st->delay_buffer, prefill_offset);
 #ifdef FIXED_POINT
             pcm_silk = st->delay_buffer;
 #else
@@ -1727,15 +1757,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
     if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
     {
-       for (i=0;i<st->channels*st->Fs/400;i++)
-          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+       OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400);
     }
 
-    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
-        st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
-    for (;i<st->encoder_buffer*st->channels;i++)
-        st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
-
+    if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0)
+    {
+       OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer));
+       OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)],
+             &pcm_buf[0],
+             (frame_size+total_buffer)*st->channels);
+    } else {
+       OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels);
+    }
     /* gain_fade() and stereo_fade() need to be after the buffer copying
        because we don't want any of this to affect the SILK part */
     if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
@@ -1955,7 +1988,8 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = FLOAT2INT16(pcm[i]);
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
    RESTORE_STACK;
    return ret;
 }
@@ -1977,7 +2011,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
          , st->analysis.subframe_mem
 #endif
          );
-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
 }
 
 #else
@@ -2002,7 +2037,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = (1.0f/32768)*pcm[i];
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
    RESTORE_STACK;
    return ret;
 }
@@ -2019,7 +2055,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra
          st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
          delay_compensation, downmix_float, st->analysis.subframe_mem);
    return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
-                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
 }
 #endif
 
@@ -2108,7 +2144,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
         case OPUS_SET_MAX_BANDWIDTH_REQUEST:
         {
             opus_int32 value = va_arg(ap, opus_int32);
-            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 
+            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND)
             {
                goto bad_arg;
             }
@@ -2418,11 +2454,14 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
         {
            void *silk_enc;
            silk_EncControlStruct dummy;
+           char *start;
            silk_enc = (char*)st+st->silk_enc_offset;
+#ifndef DISABLE_FLOAT_API
+           tonality_analysis_reset(&st->analysis);
+#endif
 
-           OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START,
-                 sizeof(OpusEncoder)-
-                 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st));
+           start = (char*)&st->OPUS_ENCODER_RESET_START;
+           OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st));
 
            celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
            silk_InitEncoder( silk_enc, st->arch, &dummy );
diff --git a/media/libopus/src/opus_multistream_decoder.c b/media/libopus/src/opus_multistream_decoder.c
index a05fa1e765..b95eaa6eac 100644
--- a/media/libopus/src/opus_multistream_decoder.c
+++ b/media/libopus/src/opus_multistream_decoder.c
@@ -75,7 +75,7 @@ int opus_multistream_decoder_init(
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
    st->layout.nb_channels = channels;
@@ -119,7 +119,7 @@ OpusMSDecoder *opus_multistream_decoder_create(
    int ret;
    OpusMSDecoder *st;
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
    {
       if (error)
          *error = OPUS_BAD_ARG;
diff --git a/media/libopus/src/opus_multistream_encoder.c b/media/libopus/src/opus_multistream_encoder.c
index 49e27913ee..9e85773573 100644
--- a/media/libopus/src/opus_multistream_encoder.c
+++ b/media/libopus/src/opus_multistream_encoder.c
@@ -41,6 +41,7 @@
 #include "modes.h"
 #include "bands.h"
 #include "quant_bands.h"
+#include "pitch.h"
 
 typedef struct {
    int nb_streams;
@@ -71,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)(
 
 struct OpusMSEncoder {
    ChannelLayout layout;
+   int arch;
    int lfe_stream;
    int application;
    int variable_duration;
@@ -98,7 +100,8 @@ static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
       else
          ptr += align(mono_size);
    }
-   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+   /* void* cast avoids clang -Wcast-align warning */
+   return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
 }
 
 static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
@@ -117,7 +120,8 @@ static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
       else
          ptr += align(mono_size);
    }
-   return (opus_val32*)ptr;
+   /* void* cast avoids clang -Wcast-align warning */
+   return (opus_val32*)(void*)ptr;
 }
 
 static int validate_encoder_layout(const ChannelLayout *layout)
@@ -199,7 +203,7 @@ static opus_val16 logSum(opus_val16 a, opus_val16 b)
       max = b;
       diff = SUB32(EXTEND32(b),EXTEND32(a));
    }
-   if (diff >= QCONST16(8.f, DB_SHIFT))
+   if (!(diff < QCONST16(8.f, DB_SHIFT)))  /* inverted to catch NaNs */
       return max;
 #ifdef FIXED_POINT
    low = SHR32(diff, DB_SHIFT-1);
@@ -218,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
 #endif
 
 void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
-      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
 )
 {
    int c;
@@ -257,7 +261,21 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
       OPUS_COPY(in, mem+c*overlap, overlap);
       (*copy_channel_in)(x, 1, pcm, channels, c, len);
       celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
-      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+#ifndef FIXED_POINT
+      {
+         opus_val32 sum;
+         sum = celt_inner_prod(in, in, frame_size+overlap, 0);
+         /* This should filter out both NaNs and ridiculous signals that could
+            cause NaNs further down. */
+         if (!(sum < 1e9f) || celt_isnan(sum))
+         {
+            OPUS_CLEAR(in, frame_size+overlap);
+            preemph_mem[c] = 0;
+         }
+      }
+#endif
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
+            overlap, celt_mode->maxLM-LM, 1, arch);
       if (upsample != 1)
       {
          int bound = len;
@@ -267,7 +285,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
             freq[i] = 0;
       }
 
-      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+      compute_band_energies(celt_mode, freq, bandE, 21, 1, LM);
       amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
       /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
       for (i=1;i<21;i++)
@@ -408,9 +426,10 @@ static int opus_multistream_encoder_init_impl(
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
+   st->arch = opus_select_arch();
    st->layout.nb_channels = channels;
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
@@ -530,7 +549,7 @@ OpusMSEncoder *opus_multistream_encoder_create(
    int ret;
    OpusMSEncoder *st;
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
    {
       if (error)
          *error = OPUS_BAD_ARG;
@@ -566,6 +585,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
 )
 {
    int ret;
+   opus_int32 size;
    OpusMSEncoder *st;
    if ((channels>255) || (channels<1))
    {
@@ -573,7 +593,14 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
          *error = OPUS_BAD_ARG;
       return NULL;
    }
-   st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family));
+   size = opus_multistream_surround_encoder_get_size(channels, mapping_family);
+   if (!size)
+   {
+      if (error)
+         *error = OPUS_UNIMPLEMENTED;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(size);
    if (st==NULL)
    {
       if (error)
@@ -591,7 +618,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
    return st;
 }
 
-static void surround_rate_allocation(
+static opus_int32 surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
       int frame_size
@@ -605,6 +632,7 @@ static void surround_rate_allocation(
    int lfe_offset;
    int coupled_ratio; /* Q8 */
    int lfe_ratio;     /* Q8 */
+   opus_int32 rate_sum=0;
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
@@ -660,7 +688,10 @@ static void surround_rate_allocation(
          rate[i] = stream_offset+channel_rate;
       else
          rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+      rate[i] = IMAX(rate[i], 500);
+      rate_sum += rate[i];
    }
+   return rate_sum;
 }
 
 /* Max size in case the encoder decides to return three frames */
@@ -674,7 +705,8 @@ static int opus_multistream_encode_native
     unsigned char *data,
     opus_int32 max_data_bytes,
     int lsb_depth,
-    downmix_func downmix
+    downmix_func downmix,
+    int float_api
 )
 {
    opus_int32 Fs;
@@ -694,6 +726,8 @@ static int opus_multistream_encode_native
    opus_val32 *mem = NULL;
    opus_val32 *preemph_mem=NULL;
    int frame_size;
+   opus_int32 rate_sum;
+   opus_int32 smallest_packet;
    ALLOC_STACK;
 
    if (st->surround)
@@ -737,6 +771,14 @@ static int opus_multistream_encode_native
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
+
+   /* Smallest packet the encoder can produce. */
+   smallest_packet = st->layout.nb_streams*2-1;
+   if (max_data_bytes < smallest_packet)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
    ALLOC(buf, 2*frame_size, opus_val16);
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
@@ -744,21 +786,23 @@ static int opus_multistream_encode_native
    ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
    if (st->surround)
    {
-      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
-   }
-
-   if (max_data_bytes < 4*st->layout.nb_streams-1)
-   {
-      RESTORE_STACK;
-      return OPUS_BUFFER_TOO_SMALL;
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
-   surround_rate_allocation(st, bitrates, frame_size);
+   rate_sum = surround_rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
-      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
-
+   {
+      if (st->bitrate_bps == OPUS_AUTO)
+      {
+         max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size));
+      } else if (st->bitrate_bps != OPUS_BITRATE_MAX)
+      {
+         max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet,
+                          3*st->bitrate_bps/(3*8*Fs/frame_size)));
+      }
+   }
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    for (s=0;s<st->layout.nb_streams;s++)
    {
@@ -843,13 +887,15 @@ static int opus_multistream_encode_native
          opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
       /* number of bytes left (+Toc) */
       curr_max = max_data_bytes - tot_size;
-      /* Reserve three bytes for the last stream and four for the others */
-      curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
+      /* Reserve one byte for the last stream and two for the others */
+      curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1);
       curr_max = IMIN(curr_max,MS_FRAME_TMP);
+      /* Repacketizer will add one or two bytes for self-delimited frames */
+      if (s != st->layout.nb_streams-1) curr_max -=  curr_max>253 ? 2 : 1;
       if (!vbr && s == st->layout.nb_streams-1)
          opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size)));
       len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
-            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api);
       if (len<0)
       {
          RESTORE_STACK;
@@ -922,7 +968,7 @@ int opus_multistream_encode(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -935,7 +981,7 @@ int opus_multistream_encode_float(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_float);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1);
 }
 #endif
 
@@ -951,7 +997,7 @@ int opus_multistream_encode_float
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 24, downmix_float);
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1);
 }
 
 int opus_multistream_encode(
@@ -963,7 +1009,7 @@ int opus_multistream_encode(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
 }
 #endif
 
diff --git a/media/libopus/src/opus_private.h b/media/libopus/src/opus_private.h
index 83225f2b6c..3b62eed096 100644
--- a/media/libopus/src/opus_private.h
+++ b/media/libopus/src/opus_private.h
@@ -33,6 +33,8 @@
 #include "opus.h"
 #include "celt.h"
 
+#include <stddef.h> /* offsetof */
+
 struct OpusRepacketizer {
    unsigned char toc;
    int nb_frames;
@@ -86,10 +88,6 @@ typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int
 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 
-int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
-                int bitrate, opus_val16 tonality, float *mem, int buffering,
-                downmix_func downmix);
-
 int encode_size(int size, unsigned char *data);
 
 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
@@ -104,16 +102,23 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
       unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
-      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
+      int analysis_channels, downmix_func downmix, int float_api);
 
 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
       opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
       opus_int32 *packet_offset, int soft_clip);
 
-/* Make sure everything's aligned to sizeof(void *) bytes */
+/* Make sure everything is properly aligned. */
 static OPUS_INLINE int align(int i)
 {
-    return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *);
+    struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;};
+
+    unsigned int alignment = offsetof(struct foo, u);
+
+    /* Optimizing compilers should optimize div and multiply into and
+       for all sensible alignment values. */
+    return ((i + alignment - 1) / alignment) * alignment;
 }
 
 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
diff --git a/media/libopus/src/repacketizer.c b/media/libopus/src/repacketizer.c
index a62675ce94..f27e9ab958 100644
--- a/media/libopus/src/repacketizer.c
+++ b/media/libopus/src/repacketizer.c
@@ -219,8 +219,9 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
    }
    if (pad)
    {
-      for (i=ptr-data;i<maxlen;i++)
-         data[i] = 0;
+      /* Fill padding with zeros. */
+      while (ptr<data+maxlen)
+         *ptr++=0;
    }
    return tot_size;
 }
diff --git a/media/libopus/update.sh b/media/libopus/update.sh
index 8676b1b80c..b36391b052 100755
--- a/media/libopus/update.sh
+++ b/media/libopus/update.sh
@@ -74,4 +74,5 @@ sed -e "s/DEFINES\['OPUS_VERSION'\][ \t]*=[ \t]*'\".*\"'/DEFINES['OPUS_VERSION']
 python gen-sources.py $1
 
 # apply outstanding local patches
-patch -p3 < ./gcc-4.8-ICE.patch
+patch -p3 < gcc-4.8-ICE.patch
+patch -p3 < nonunified.patch
diff --git a/media/libvpx/vp8/common/postproc.c b/media/libvpx/vp8/common/postproc.c
index 3b05bc63e7..a4e6ae170c 100644
--- a/media/libvpx/vp8/common/postproc.c
+++ b/media/libvpx/vp8/common/postproc.c
@@ -330,7 +330,7 @@ void vp8_deblock(VP8_COMMON                 *cm,
     double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
     int ppl = (int)(level + .5);
 
-    const MODE_INFO *mode_info_context = cm->mi;
+    const MODE_INFO *mode_info_context = cm->show_frame_mi;
     int mbr, mbc;
 
     /* The pixel thresholds are adjusted according to if or not the macroblock
diff --git a/media/libyuv/source/compare_win.cc b/media/libyuv/source/compare_win.cc
index 99831651f5..522e2989d2 100644
--- a/media/libyuv/source/compare_win.cc
+++ b/media/libyuv/source/compare_win.cc
@@ -18,7 +18,7 @@ extern "C" {
 
 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
   __asm {
     mov        eax, [esp + 4]    // src_a
@@ -60,7 +60,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
 #if _MSC_VER >= 1700
 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
 #pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
   __asm {
     mov        eax, [esp + 4]    // src_a
@@ -135,7 +135,7 @@ static uvec32 kHashMul3 = {
 #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
     _asm _emit 0x40 _asm _emit reg
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
   __asm {
     mov        eax, [esp + 4]    // src
@@ -187,7 +187,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
 
 // Visual C 2012 required for AVX2.
 #if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
   __asm {
     mov        eax, [esp + 4]    // src
diff --git a/media/libyuv/source/rotate.cc b/media/libyuv/source/rotate.cc
index b052ac1dc4..d130c2db93 100644
--- a/media/libyuv/source/rotate.cc
+++ b/media/libyuv/source/rotate.cc
@@ -76,7 +76,7 @@ void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
 #if !defined(LIBYUV_DISABLE_X86) && \
     defined(_M_IX86) && defined(_MSC_VER)
 #define HAS_TRANSPOSE_WX8_SSSE3
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
                                uint8* dst, int dst_stride, int width) {
   __asm {
@@ -168,7 +168,7 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
 }
 
 #define HAS_TRANSPOSE_UVWX8_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
                                 uint8* dst_a, int dst_stride_a,
                                 uint8* dst_b, int dst_stride_b,
diff --git a/media/libyuv/source/row_win.cc b/media/libyuv/source/row_win.cc
index f13e4d7ae5..ecf08b32cc 100644
--- a/media/libyuv/source/row_win.cc
+++ b/media/libyuv/source/row_win.cc
@@ -144,7 +144,7 @@ static const uvec8 kShuffleMaskARGBToRAW_0 = {
 };
 
 // Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
   __asm {
     mov        eax, [esp + 4]        // src_y
@@ -172,7 +172,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
                                   int pix) {
   __asm {
@@ -201,7 +201,7 @@ void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_rgb24
@@ -240,7 +240,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
                         int pix) {
   __asm {
@@ -287,7 +287,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
 // v * (256 + 8)
 // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
 // 20 instructions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
                           int pix) {
   __asm {
@@ -338,7 +338,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
 }
 
 // 24 instructions
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
                             int pix) {
   __asm {
@@ -392,7 +392,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
 }
 
 // 18 instructions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
                             int pix) {
   __asm {
@@ -431,7 +431,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_argb
@@ -470,7 +470,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_argb
@@ -509,7 +509,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_argb
@@ -548,7 +548,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
 }
 
 // TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_argb
@@ -590,7 +590,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
   __asm {
     mov       eax, [esp + 4]   // src_argb
@@ -621,7 +621,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
 }
 
 // Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -656,7 +656,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 }
 
 // Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -693,7 +693,7 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 
 #ifdef HAS_ARGBTOYROW_AVX2
 // Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
+__declspec(naked)
 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -733,7 +733,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
 
 #ifdef HAS_ARGBTOYROW_AVX2
 // Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
+__declspec(naked)
 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -773,7 +773,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
 }
 #endif  //  HAS_ARGBTOYJROW_AVX2
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -807,7 +807,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -842,7 +842,7 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -876,7 +876,7 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -910,7 +910,7 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -944,7 +944,7 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -978,7 +978,7 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -1012,7 +1012,7 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -1046,7 +1046,7 @@ void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1112,7 +1112,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                         uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1180,7 +1180,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
 }
 
 #ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) __declspec(align(32))
+__declspec(naked)
 void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
                       uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1246,7 +1246,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
 }
 #endif  // HAS_ARGBTOUVROW_AVX2
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1316,7 +1316,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1387,7 +1387,7 @@ void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
                           uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1445,7 +1445,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
                                     uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1503,7 +1503,7 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
                           uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1562,7 +1562,7 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
                                     uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1621,7 +1621,7 @@ void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1687,7 +1687,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1757,7 +1757,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1823,7 +1823,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1893,7 +1893,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -1959,7 +1959,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
   __asm {
@@ -2077,7 +2077,7 @@ static const lvec16 kUVBiasR_AVX = {
 
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToARGBRow_AVX2(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2276,7 +2276,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
 
 // 8 pixels, dest aligned 16.
 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I444ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2319,7 +2319,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToRGB24Row_SSSE3(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
@@ -2366,7 +2366,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToRAWRow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
@@ -2413,7 +2413,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest unaligned.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToRGB565Row_SSSE3(const uint8* y_buf,
                            const uint8* u_buf,
                            const uint8* v_buf,
@@ -2486,7 +2486,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2530,7 +2530,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
 // 8 pixels, dest aligned 16.
 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 // Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I411ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2575,7 +2575,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* uv_buf,
                          uint8* dst_argb,
@@ -2613,7 +2613,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void NV21ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* uv_buf,
                          uint8* dst_argb,
@@ -2651,7 +2651,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
 
 // 8 pixels, unaligned.
 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -2694,7 +2694,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 
 // 8 pixels, unaligned.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -2738,7 +2738,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 // 8 pixels, unaligned.
 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 // Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -2783,7 +2783,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* uv_buf,
                                    uint8* dst_argb,
@@ -2821,7 +2821,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* uv_buf,
                                    uint8* dst_argb,
@@ -2857,7 +2857,7 @@ void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToBGRARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2898,7 +2898,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -2939,7 +2939,7 @@ void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToABGRRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -2980,7 +2980,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -3021,7 +3021,7 @@ void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToRGBARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
@@ -3062,7 +3062,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -3106,7 +3106,7 @@ void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
 #endif  // HAS_I422TOARGBROW_SSSE3
 
 #ifdef HAS_YTOARGBROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YToARGBRow_SSE2(const uint8* y_buf,
                      uint8* rgb_buf,
                      int width) {
@@ -3159,7 +3159,7 @@ static const uvec8 kShuffleMirror = {
   15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
   __asm {
     mov       eax, [esp + 4]   // src
@@ -3188,7 +3188,7 @@ static const ulvec8 kShuffleMirror_AVX2 = {
   15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov       eax, [esp + 4]   // src
@@ -3215,7 +3215,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
 #ifdef HAS_MIRRORROW_SSE2
 // SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
 // version can not.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov       eax, [esp + 4]   // src
@@ -3248,7 +3248,7 @@ static const uvec8 kShuffleMirrorUV = {
   14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
                        int width) {
   __asm {
@@ -3284,7 +3284,7 @@ static const uvec8 kARGBShuffleMirror = {
   12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
   __asm {
     mov       eax, [esp + 4]   // src
@@ -3313,7 +3313,7 @@ static const ulvec32 kARGBShuffleMirror_AVX2 = {
   7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov       eax, [esp + 4]   // src
@@ -3336,7 +3336,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
 #endif  // HAS_ARGBMIRRORROW_AVX2
 
 #ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
     push       edi
@@ -3372,7 +3372,7 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
                                int pix) {
   __asm {
@@ -3411,7 +3411,7 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
 #endif  // HAS_SPLITUVROW_SSE2
 
 #ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
     push       edi
@@ -3450,7 +3450,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
 #endif  // HAS_SPLITUVROW_AVX2
 
 #ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                      int width) {
   __asm {
@@ -3480,7 +3480,7 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
                                uint8* dst_uv, int width) {
   __asm {
@@ -3512,7 +3512,7 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
 #endif  //  HAS_MERGEUVROW_SSE2
 
 #ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                      int width) {
   __asm {
@@ -3547,7 +3547,7 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
 
 #ifdef HAS_COPYROW_SSE2
 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
   __asm {
     mov        eax, [esp + 4]   // src
@@ -3570,7 +3570,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
 #endif  // HAS_COPYROW_SSE2
 
 // Unaligned Multiple of 1.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
   __asm {
     mov        eax, esi
@@ -3586,7 +3586,7 @@ void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
 }
 
 #ifdef HAS_COPYROW_X86
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void CopyRow_X86(const uint8* src, uint8* dst, int count) {
   __asm {
     mov        eax, esi
@@ -3605,7 +3605,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) {
 
 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
 // width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov        eax, [esp + 4]   // src
@@ -3642,7 +3642,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
 // width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov        eax, [esp + 4]   // src
@@ -3672,7 +3672,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
 // width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov        eax, [esp + 4]   // src
@@ -3711,7 +3711,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
 // width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
   __asm {
     mov        eax, [esp + 4]   // src
@@ -3743,7 +3743,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_SETROW_X86
 // SetRow8 writes 'count' bytes using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SetRow_X86(uint8* dst, uint32 v32, int count) {
   __asm {
     mov        edx, edi
@@ -3758,7 +3758,7 @@ void SetRow_X86(uint8* dst, uint32 v32, int count) {
 }
 
 // SetRow32 writes 'count' words using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
                    int dst_stride, int height) {
   __asm {
@@ -3790,7 +3790,7 @@ void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
 #endif  // HAS_SETROW_X86
 
 #ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToYRow_AVX2(const uint8* src_yuy2,
                      uint8* dst_y, int pix) {
   __asm {
@@ -3818,7 +3818,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
                       uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -3863,7 +3863,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
                          uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -3903,7 +3903,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToYRow_AVX2(const uint8* src_uyvy,
                      uint8* dst_y, int pix) {
   __asm {
@@ -3929,7 +3929,7 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
                       uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -3974,7 +3974,7 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
                          uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4016,7 +4016,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
 #endif  // HAS_YUY2TOYROW_AVX2
 
 #ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToYRow_SSE2(const uint8* src_yuy2,
                      uint8* dst_y, int pix) {
   __asm {
@@ -4042,7 +4042,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
                       uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4086,7 +4086,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
                          uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4123,7 +4123,7 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
                                uint8* dst_y, int pix) {
   __asm {
@@ -4149,7 +4149,7 @@ void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
                                 uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4193,7 +4193,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
                                    uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4230,7 +4230,7 @@ void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToYRow_SSE2(const uint8* src_uyvy,
                      uint8* dst_y, int pix) {
   __asm {
@@ -4254,7 +4254,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
                       uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4298,7 +4298,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
                          uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4335,7 +4335,7 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
                                uint8* dst_y, int pix) {
   __asm {
@@ -4359,7 +4359,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
                                 uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4403,7 +4403,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
                                    uint8* dst_u, uint8* dst_v, int pix) {
   __asm {
@@ -4443,7 +4443,7 @@ void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
 
 #ifdef HAS_ARGBBLENDROW_SSE2
 // Blend 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
                        uint8* dst_argb, int width) {
   __asm {
@@ -4577,7 +4577,7 @@ static const uvec8 kShuffleAlpha = {
 //    pshufb     xmm3, kShuffleAlpha // alpha
 // Blend 8 pixels at a time.
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
                         uint8* dst_argb, int width) {
   __asm {
@@ -4725,7 +4725,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
 #ifdef HAS_ARGBATTENUATEROW_SSE2
 // Attenuate 4 pixels at a time.
 // Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4]   // src_argb0
@@ -4775,7 +4775,7 @@ static const uvec8 kShuffleAlpha1 = {
   11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
   15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
 };
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4]   // src_argb0
@@ -4823,7 +4823,7 @@ static const ulvec8 kShuffleAlpha_AVX2 = {
   6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
   14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
 };
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4]   // src_argb0
@@ -4862,7 +4862,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
 // Unattenuate 4 pixels at a time.
 // Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
                              int width) {
   __asm {
@@ -4918,7 +4918,7 @@ static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
 // TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
 // USE_GATHER is not on by default, due to being a slow instruction.
 #ifdef USE_GATHER
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
                              int width) {
   __asm {
@@ -4953,7 +4953,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
   }
 }
 #else  // USE_GATHER
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
                              int width) {
   __asm {
@@ -5021,7 +5021,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
 
 #ifdef HAS_ARGBGRAYROW_SSSE3
 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4]   /* src_argb */
@@ -5081,7 +5081,7 @@ static const vec8 kARGBToSepiaR = {
 };
 
 // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4]   /* dst_argb */
@@ -5139,7 +5139,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
 // Same as Sepia except matrix is provided.
 // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
 // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
                               const int8* matrix_argb, int width) {
   __asm {
@@ -5202,7 +5202,7 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
 #ifdef HAS_ARGBQUANTIZEROW_SSE2
 // Quantize 4 ARGB pixels (16 bytes).
 // Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
                           int interval_offset, int width) {
   __asm {
@@ -5249,7 +5249,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
 #ifdef HAS_ARGBSHADEROW_SSE2
 // Shade 4 pixels at a time by specified value.
 // Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
                        uint32 value) {
   __asm {
@@ -5284,7 +5284,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
 
 #ifdef HAS_ARGBMULTIPLYROW_SSE2
 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
                           uint8* dst_argb, int width) {
   __asm {
@@ -5324,7 +5324,7 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
 #ifdef HAS_ARGBADDROW_SSE2
 // Add 2 rows of ARGB pixels together, 4 pixels at a time.
 // TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
                      uint8* dst_argb, int width) {
   __asm {
@@ -5373,7 +5373,7 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
 
 #ifdef HAS_ARGBSUBTRACTROW_SSE2
 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
                           uint8* dst_argb, int width) {
   __asm {
@@ -5403,7 +5403,7 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
 
 #ifdef HAS_ARGBMULTIPLYROW_AVX2
 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
                           uint8* dst_argb, int width) {
   __asm {
@@ -5441,7 +5441,7 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
 
 #ifdef HAS_ARGBADDROW_AVX2
 // Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
                      uint8* dst_argb, int width) {
   __asm {
@@ -5471,7 +5471,7 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
 
 #ifdef HAS_ARGBSUBTRACTROW_AVX2
 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
                           uint8* dst_argb, int width) {
   __asm {
@@ -5504,7 +5504,7 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
 // -1  0  1
 // -2  0  2
 // -1  0  1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
                     const uint8* src_y2, uint8* dst_sobelx, int width) {
   __asm {
@@ -5561,7 +5561,7 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
 // -1 -2 -1
 //  0  0  0
 //  1  2  1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
                     uint8* dst_sobely, int width) {
   __asm {
@@ -5615,7 +5615,7 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
 // R = Sobel
 // G = Sobel
 // B = Sobel
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
                    uint8* dst_argb, int width) {
   __asm {
@@ -5663,7 +5663,7 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
 
 #ifdef HAS_SOBELTOPLANEROW_SSE2
 // Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
                           uint8* dst_y, int width) {
   __asm {
@@ -5697,7 +5697,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
 // R = Sobel X
 // G = Sobel
 // B = Sobel Y
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
                      uint8* dst_argb, int width) {
   __asm {
@@ -5991,7 +5991,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
 
 #ifdef HAS_ARGBAFFINEROW_SSE2
 // Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 LIBYUV_API
 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
                         uint8* dst_argb, const float* uv_dudv, int width) {
@@ -6078,7 +6078,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
 
 #ifdef HAS_INTERPOLATEROW_AVX2
 // Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
                           ptrdiff_t src_stride, int dst_width,
                           int source_y_fraction) {
@@ -6179,7 +6179,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
 
 #ifdef HAS_INTERPOLATEROW_SSSE3
 // Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
                           ptrdiff_t src_stride, int dst_width,
                           int source_y_fraction) {
@@ -6286,7 +6286,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 
 #ifdef HAS_INTERPOLATEROW_SSE2
 // Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
                          ptrdiff_t src_stride, int dst_width,
                          int source_y_fraction) {
@@ -6398,7 +6398,7 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
 #endif  // HAS_INTERPOLATEROW_SSE2
 
 // Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
                                     ptrdiff_t src_stride, int dst_width,
                                     int source_y_fraction) {
@@ -6504,7 +6504,7 @@ void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 
 #ifdef HAS_INTERPOLATEROW_SSE2
 // Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
                                    ptrdiff_t src_stride, int dst_width,
                                    int source_y_fraction) {
@@ -6615,7 +6615,7 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
 }
 #endif  // HAS_INTERPOLATEROW_SSE2
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
                   uint8* dst_uv, int pix) {
   __asm {
@@ -6640,7 +6640,7 @@ void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
 }
 
 #ifdef HAS_HALFROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
                   uint8* dst_uv, int pix) {
   __asm {
@@ -6667,7 +6667,7 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
 }
 #endif  // HAS_HALFROW_AVX2
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
                           uint32 selector, int pix) {
   __asm {
@@ -6694,7 +6694,7 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
 }
 
 // Specialized ARGB to Bayer that just isolates G channel.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
                            uint32 selector, int pix) {
   __asm {
@@ -6725,7 +6725,7 @@ void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
 }
 
 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
                           const uint8* shuffler, int pix) {
   __asm {
@@ -6751,7 +6751,7 @@ void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
                                     const uint8* shuffler, int pix) {
   __asm {
@@ -6778,7 +6778,7 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
 }
 
 #ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
                          const uint8* shuffler, int pix) {
   __asm {
@@ -6807,7 +6807,7 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
 }
 #endif  // HAS_ARGBSHUFFLEROW_AVX2
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
                          const uint8* shuffler, int pix) {
   __asm {
@@ -6933,7 +6933,7 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
 // UYVY - Macro-pixel = 2 image pixels
 // U0Y0V0Y1
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToYUY2Row_SSE2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
@@ -6971,7 +6971,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
   }
 }
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void I422ToUYVYRow_SSE2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
@@ -7010,7 +7010,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
 }
 
 #ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
                             uint8* dst_argb, const float* poly,
                             int width) {
@@ -7070,7 +7070,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
 #endif  // HAS_ARGBPOLYNOMIALROW_SSE2
 
 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
                             uint8* dst_argb, const float* poly,
                             int width) {
@@ -7111,7 +7111,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_ARGBCOLORTABLEROW_X86
 // Tranform ARGB pixels with color table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
                            int width) {
   __asm {
@@ -7146,7 +7146,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
 
 #ifdef HAS_RGBCOLORTABLEROW_X86
 // Tranform RGB pixels with color table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
   __asm {
     push       esi
@@ -7178,7 +7178,7 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
 
 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
 // Tranform RGB pixels with luma table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
                                  int width,
                                  const uint8* luma, uint32 lumacoeff) {
diff --git a/media/libyuv/source/scale_win.cc b/media/libyuv/source/scale_win.cc
index 840b9738da..cfeaefc725 100644
--- a/media/libyuv/source/scale_win.cc
+++ b/media/libyuv/source/scale_win.cc
@@ -94,7 +94,7 @@ static uvec16 kScaleAb2 =
 
 // Reads 32 pixels, throws half away and writes 16 pixels.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                         uint8* dst_ptr, int dst_width) {
   __asm {
@@ -122,7 +122,7 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Blends 32x1 rectangle to 16x1.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                               uint8* dst_ptr, int dst_width) {
   __asm {
@@ -160,7 +160,7 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Blends 32x2 rectangle to 16x1.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width) {
   __asm {
@@ -204,7 +204,7 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Reads 32 pixels, throws half away and writes 16 pixels.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
                                   ptrdiff_t src_stride,
                                   uint8* dst_ptr, int dst_width) {
@@ -233,7 +233,7 @@ void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
 
 // Blends 32x1 rectangle to 16x1.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
                                         ptrdiff_t src_stride,
                                         uint8* dst_ptr, int dst_width) {
@@ -272,7 +272,7 @@ void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
 
 // Blends 32x2 rectangle to 16x1.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
                                      ptrdiff_t src_stride,
                                      uint8* dst_ptr, int dst_width) {
@@ -317,7 +317,7 @@ void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
 
 // Point samples 32 pixels to 8 pixels.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                         uint8* dst_ptr, int dst_width) {
   __asm {
@@ -350,7 +350,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Blends 32x4 rectangle to 8x1.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width) {
   __asm {
@@ -415,7 +415,7 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Note that movdqa+palign may be better than movdqu.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width) {
   __asm {
@@ -464,7 +464,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 
 // Note that movdqa+palign may be better than movdqu.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
                                 ptrdiff_t src_stride,
                                 uint8* dst_ptr, int dst_width) {
@@ -523,7 +523,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
 
 // Note that movdqa+palign may be better than movdqu.
 // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
                                 ptrdiff_t src_stride,
                                 uint8* dst_ptr, int dst_width) {
@@ -586,7 +586,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
 // 3/8 point sampler
 
 // Scale 32 pixels to 12
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width) {
   __asm {
@@ -618,7 +618,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 }
 
 // Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
                                 ptrdiff_t src_stride,
                                 uint8* dst_ptr, int dst_width) {
@@ -684,7 +684,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 }
 
 // Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
                                 ptrdiff_t src_stride,
                                 uint8* dst_ptr, int dst_width) {
@@ -730,7 +730,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
 
 // Reads 16xN bytes and produces 16 shorts at a time.
 // TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint16* dst_ptr, int src_width,
                        int src_height) {
@@ -800,7 +800,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 // when drmemory bug fixed.
 // https://code.google.com/p/drmemory/issues/detail?id=1396
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
                            int dst_width, int x, int dx) {
   __asm {
@@ -881,7 +881,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 
 // Reads 16 pixels, duplicates them and writes 32 pixels.
 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
                        int dst_width, int x, int dx) {
   __asm {
@@ -908,7 +908,7 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
 
 // Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
                             ptrdiff_t src_stride,
                             uint8* dst_argb, int dst_width) {
@@ -935,7 +935,7 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
 
 // Blends 8x1 rectangle to 4x1.
 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
                                   ptrdiff_t src_stride,
                                   uint8* dst_argb, int dst_width) {
@@ -965,7 +965,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
 
 // Blends 8x2 rectangle to 4x1.
 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
                                ptrdiff_t src_stride,
                                uint8* dst_argb, int dst_width) {
@@ -1001,7 +1001,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
 
 // Reads 4 pixels at a time.
 // Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
                                int src_stepx,
                                uint8* dst_argb, int dst_width) {
@@ -1039,7 +1039,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
 
 // Blends four 2x2 to 4x1.
 // Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
                                   ptrdiff_t src_stride,
                                   int src_stepx,
@@ -1088,7 +1088,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
 }
 
 // Column scaling unfiltered. SSE2 version.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
                         int dst_width, int x, int dx) {
   __asm {
@@ -1182,7 +1182,7 @@ static uvec8 kShuffleFractions = {
   0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
 };
 
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
                                int dst_width, int x, int dx) {
   __asm {
@@ -1257,7 +1257,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
 
 // Reads 4 pixels, duplicates them and writes 8 pixels.
 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
                            int dst_width, int x, int dx) {
   __asm {
@@ -1283,7 +1283,7 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
 }
 
 // Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 int FixedDiv_X86(int num, int div) {
   __asm {
     mov        eax, [esp + 4]    // num
@@ -1296,7 +1296,7 @@ int FixedDiv_X86(int num, int div) {
 }
 
 // Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 int FixedDiv1_X86(int num, int div) {
   __asm {
     mov        eax, [esp + 4]    // num
diff --git a/netwerk/base/BackgroundFileSaver.cpp b/netwerk/base/BackgroundFileSaver.cpp
index 9d18e13817..b48fb724b6 100644
--- a/netwerk/base/BackgroundFileSaver.cpp
+++ b/netwerk/base/BackgroundFileSaver.cpp
@@ -1198,6 +1198,10 @@ DigestOutputStream::DigestOutputStream(nsIOutputStream* aStream,
 
 DigestOutputStream::~DigestOutputStream()
 {
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return;
+  }
   shutdown(calledFromObject);
 }
 
@@ -1252,5 +1256,7 @@ DigestOutputStream::IsNonBlocking(bool *retval)
   return mOutputStream->IsNonBlocking(retval);
 }
 
+#undef LOG_ENABLED
+
 } // namespace net
 } // namespace mozilla
diff --git a/netwerk/base/nsSocketTransportService2.h b/netwerk/base/nsSocketTransportService2.h
index 033ce1368b..35b2ac6aba 100644
--- a/netwerk/base/nsSocketTransportService2.h
+++ b/netwerk/base/nsSocketTransportService2.h
@@ -81,10 +81,7 @@ public:
     NS_DECL_NSITHREADOBSERVER
     NS_DECL_NSIRUNNABLE
     NS_DECL_NSIOBSERVER 
-    // missing from NS_DECL_NSIEVENTTARGET because MSVC
-    nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-      return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-    }
+    using nsIEventTarget::Dispatch;
 
     nsSocketTransportService();
 
diff --git a/netwerk/base/nsStreamTransportService.h b/netwerk/base/nsStreamTransportService.h
index 723bb3df9a..61fe8e55dc 100644
--- a/netwerk/base/nsStreamTransportService.h
+++ b/netwerk/base/nsStreamTransportService.h
@@ -21,10 +21,7 @@ public:
     NS_DECL_NSISTREAMTRANSPORTSERVICE
     NS_DECL_NSIEVENTTARGET
     NS_DECL_NSIOBSERVER
-    // missing from NS_DECL_NSIEVENTTARGET because MSVC
-    nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-      return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-    }
+    using nsIEventTarget::Dispatch;
 
     nsresult Init();
 
diff --git a/netwerk/base/nsURLHelper.cpp b/netwerk/base/nsURLHelper.cpp
index 69598dbe7d..9871e05eca 100644
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -436,7 +436,7 @@ net_ResolveRelativePath(const nsACString &relativePath,
           case '#':
           case '?':
             stop = true;
-            // fall through...
+            MOZ_FALLTHROUGH;
           case '/':
             // delimiter found
             if (name.EqualsLiteral("..")) {
diff --git a/netwerk/cache/nsDiskCacheDevice.cpp b/netwerk/cache/nsDiskCacheDevice.cpp
index cf77c65833..b224ed0097 100644
--- a/netwerk/cache/nsDiskCacheDevice.cpp
+++ b/netwerk/cache/nsDiskCacheDevice.cpp
@@ -303,17 +303,17 @@ nsDiskCache::Hash(const char * key, PLDHashNumber initval)
   /*------------------------------------- handle the last 11 bytes */
   c += length;
   switch(len) {              /* all the case statements fall through */
-    case 11: c += (uint32_t(k[10])<<24);
-    case 10: c += (uint32_t(k[9])<<16);
-    case 9 : c += (uint32_t(k[8])<<8);
+    case 11: c += (uint32_t(k[10])<<24);  MOZ_FALLTHROUGH;
+    case 10: c += (uint32_t(k[9])<<16);   MOZ_FALLTHROUGH;
+    case 9 : c += (uint32_t(k[8])<<8);    MOZ_FALLTHROUGH;
     /* the low-order byte of c is reserved for the length */
-    case 8 : b += (uint32_t(k[7])<<24);
-    case 7 : b += (uint32_t(k[6])<<16);
-    case 6 : b += (uint32_t(k[5])<<8);
-    case 5 : b += k[4];
-    case 4 : a += (uint32_t(k[3])<<24);
-    case 3 : a += (uint32_t(k[2])<<16);
-    case 2 : a += (uint32_t(k[1])<<8);
+    case 8 : b += (uint32_t(k[7])<<24);   MOZ_FALLTHROUGH;
+    case 7 : b += (uint32_t(k[6])<<16);   MOZ_FALLTHROUGH;
+    case 6 : b += (uint32_t(k[5])<<8);    MOZ_FALLTHROUGH;
+    case 5 : b += k[4];                   MOZ_FALLTHROUGH;
+    case 4 : a += (uint32_t(k[3])<<24);   MOZ_FALLTHROUGH;
+    case 3 : a += (uint32_t(k[2])<<16);   MOZ_FALLTHROUGH;
+    case 2 : a += (uint32_t(k[1])<<8);    MOZ_FALLTHROUGH;
     case 1 : a += k[0];
     /* case 0: nothing left to add */
   }
@@ -1145,4 +1145,3 @@ nsDiskCacheDevice::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf)
 
     return usage;
 }
-
diff --git a/netwerk/cache2/CacheHashUtils.cpp b/netwerk/cache2/CacheHashUtils.cpp
index 8e4eda56b5..4dc298bf0a 100644
--- a/netwerk/cache2/CacheHashUtils.cpp
+++ b/netwerk/cache2/CacheHashUtils.cpp
@@ -56,17 +56,17 @@ CacheHash::Hash(const char *aData, uint32_t aSize, uint32_t aInitval)
   /*------------------------------------- handle the last 11 bytes */
   c += aSize;
   switch(len) {              /* all the case statements fall through */
-    case 11: c += (uint32_t(k[10])<<24);
-    case 10: c += (uint32_t(k[9])<<16);
-    case 9 : c += (uint32_t(k[8])<<8);
+    case 11: c += (uint32_t(k[10])<<24);  MOZ_FALLTHROUGH;
+    case 10: c += (uint32_t(k[9])<<16);   MOZ_FALLTHROUGH;
+    case 9 : c += (uint32_t(k[8])<<8);    MOZ_FALLTHROUGH;
     /* the low-order byte of c is reserved for the length */
-    case 8 : b += (uint32_t(k[7])<<24);
-    case 7 : b += (uint32_t(k[6])<<16);
-    case 6 : b += (uint32_t(k[5])<<8);
-    case 5 : b += k[4];
-    case 4 : a += (uint32_t(k[3])<<24);
-    case 3 : a += (uint32_t(k[2])<<16);
-    case 2 : a += (uint32_t(k[1])<<8);
+    case 8 : b += (uint32_t(k[7])<<24);   MOZ_FALLTHROUGH;
+    case 7 : b += (uint32_t(k[6])<<16);   MOZ_FALLTHROUGH;
+    case 6 : b += (uint32_t(k[5])<<8);    MOZ_FALLTHROUGH;
+    case 5 : b += k[4];                   MOZ_FALLTHROUGH;
+    case 4 : a += (uint32_t(k[3])<<24);   MOZ_FALLTHROUGH;
+    case 3 : a += (uint32_t(k[2])<<16);   MOZ_FALLTHROUGH;
+    case 2 : a += (uint32_t(k[1])<<8);    MOZ_FALLTHROUGH;
     case 1 : a += k[0];
     /* case 0: nothing left to add */
   }
@@ -156,8 +156,8 @@ CacheHash::Update(const char *aData, uint32_t aLen)
   }
 
   switch (aLen) {
-    case 3: mBuf += data[2] << 16;
-    case 2: mBuf += data[1] << 8;
+    case 3: mBuf += data[2] << 16;  MOZ_FALLTHROUGH;
+    case 2: mBuf += data[1] << 8;   MOZ_FALLTHROUGH;
     case 1: mBuf += data[0];
   }
 
@@ -189,4 +189,3 @@ CacheHash::GetHash16()
 
 } // namespace net
 } // namespace mozilla
-
diff --git a/netwerk/cache2/CacheIndex.cpp b/netwerk/cache2/CacheIndex.cpp
index d6da7030a7..bbfe94a6f7 100644
--- a/netwerk/cache2/CacheIndex.cpp
+++ b/netwerk/cache2/CacheIndex.cpp
@@ -127,7 +127,7 @@ private:
         if (!mDoNotSearchInUpdates) {
           entry = mIndex->mPendingUpdates.GetEntry(*mHash);
         }
-        // no break
+        MOZ_FALLTHROUGH;
       case CacheIndex::BUILDING:
       case CacheIndex::UPDATING:
       case CacheIndex::READY:
@@ -430,7 +430,7 @@ CacheIndex::Shutdown()
   switch (oldState) {
     case WRITING:
       index->FinishWrite(false);
-      // no break
+      MOZ_FALLTHROUGH;
     case READY:
       if (index->mIndexOnDiskIsValid && !index->mDontMarkIndexClean) {
         if (!sanitize && NS_FAILED(index->WriteLogToDisk())) {
@@ -1125,7 +1125,7 @@ CacheIndex::HasEntry(const SHA1Sum::Hash &hash, EntryStatus *_retval, bool *_pin
     case READING:
     case WRITING:
       entry = index->mPendingUpdates.GetEntry(hash);
-      // no break
+      MOZ_FALLTHROUGH;
     case BUILDING:
     case UPDATING:
     case READY:
diff --git a/netwerk/cache2/CacheStorageService.cpp b/netwerk/cache2/CacheStorageService.cpp
index 0cf532a9d9..a6cfa0e7f0 100644
--- a/netwerk/cache2/CacheStorageService.cpp
+++ b/netwerk/cache2/CacheStorageService.cpp
@@ -420,7 +420,7 @@ private:
         }
 
         mPass = ITERATE_METADATA;
-        // no break
+        MOZ_FALLTHROUGH;
 
       case ITERATE_METADATA:
         // Now grab the context iterator.
diff --git a/netwerk/cookie/nsCookieService.cpp b/netwerk/cookie/nsCookieService.cpp
index 1f124da304..8cb899a7b1 100644
--- a/netwerk/cookie/nsCookieService.cpp
+++ b/netwerk/cookie/nsCookieService.cpp
@@ -989,6 +989,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
         NS_ENSURE_SUCCESS(rv, RESULT_RETRY);
       }
       // Fall through to the next upgrade.
+      MOZ_FALLTHROUGH;
 
     case 2:
       {
@@ -1047,6 +1048,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
         NS_ENSURE_SUCCESS(rv, RESULT_RETRY);
       }
       // Fall through to the next upgrade.
+      MOZ_FALLTHROUGH;
 
     case 3:
       {
@@ -1143,6 +1145,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
         NS_ENSURE_SUCCESS(rv, RESULT_RETRY);
       }
       // Fall through to the next upgrade.
+      MOZ_FALLTHROUGH;
 
     case 4:
       {
@@ -1190,6 +1193,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
           ("Upgraded database to schema version 5"));
       }
       // Fall through to the next upgrade.
+      MOZ_FALLTHROUGH;
 
     case 5:
       {
@@ -1255,6 +1259,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
         COOKIE_LOGSTRING(LogLevel::Debug,
           ("Upgraded database to schema version 6"));
       }
+      MOZ_FALLTHROUGH;
 
     case 6:
       {
@@ -1314,6 +1319,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
       // No more upgrades. Update the schema version.
       rv = mDefaultDBState->dbConn->SetSchemaVersion(COOKIES_SCHEMA_VERSION);
       NS_ENSURE_SUCCESS(rv, RESULT_RETRY);
+      MOZ_FALLTHROUGH;
 
     case COOKIES_SCHEMA_VERSION:
       break;
@@ -1321,7 +1327,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
     case 0:
       {
         NS_WARNING("couldn't get schema version!");
-          
+
         // the table may be usable; someone might've just clobbered the schema
         // version. we can treat this case like a downgrade using the codepath
         // below, by verifying the columns we care about are all there. for now,
@@ -1331,6 +1337,7 @@ nsCookieService::TryInitDB(bool aRecreateDB)
         NS_ENSURE_SUCCESS(rv, RESULT_RETRY);
       }
       // fall through to downgrade check
+      MOZ_FALLTHROUGH;
 
     // downgrading.
     // if columns have been added to the table, we can still use the ones we
diff --git a/netwerk/protocol/http/Http2Stream.cpp b/netwerk/protocol/http/Http2Stream.cpp
index 66f69d5d13..29d244b3b4 100644
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@@ -1339,7 +1339,7 @@ Http2Stream::OnReadSegment(const char *buf,
     mRequestBodyLenRemaining -= dataLength;
     GenerateDataFrameHeader(dataLength, !mRequestBodyLenRemaining);
     ChangeState(SENDING_BODY);
-    // NO BREAK
+    MOZ_FALLTHROUGH;
 
   case SENDING_BODY:
     MOZ_ASSERT(mTxInlineFrameUsed, "OnReadSegment Send Data Header 0b");
@@ -1453,4 +1453,3 @@ Http2Stream::MapStreamToHttpConnection()
 
 } // namespace net
 } // namespace mozilla
-
diff --git a/netwerk/protocol/http/SpdyStream31.cpp b/netwerk/protocol/http/SpdyStream31.cpp
index d5d40da54e..d80e61790d 100644
--- a/netwerk/protocol/http/SpdyStream31.cpp
+++ b/netwerk/protocol/http/SpdyStream31.cpp
@@ -1529,7 +1529,7 @@ SpdyStream31::OnReadSegment(const char *buf,
     mRequestBodyLenRemaining -= dataLength;
     GenerateDataFrameHeader(dataLength, !mRequestBodyLenRemaining);
     ChangeState(SENDING_REQUEST_BODY);
-    // NO BREAK
+    MOZ_FALLTHROUGH;
 
   case SENDING_REQUEST_BODY:
     MOZ_ASSERT(mTxInlineFrameUsed, "OnReadSegment Send Data Header 0b");
diff --git a/netwerk/protocol/http/nsHttpTransaction.cpp b/netwerk/protocol/http/nsHttpTransaction.cpp
index c83818b3f1..0f4198e9b8 100644
--- a/netwerk/protocol/http/nsHttpTransaction.cpp
+++ b/netwerk/protocol/http/nsHttpTransaction.cpp
@@ -1629,7 +1629,8 @@ nsHttpTransaction::HandleContentStart()
         // check if this is a no-content response
         switch (mResponseHead->Status()) {
         case 101:
-            mPreserveStream = true;    // fall through to other no content
+            mPreserveStream = true;
+            MOZ_FALLTHROUGH; // to other no content cases:
         case 204:
         case 205:
         case 304:
diff --git a/netwerk/protocol/websocket/WebSocketChannel.cpp b/netwerk/protocol/websocket/WebSocketChannel.cpp
index 2a9fe5347b..eb56608a82 100644
--- a/netwerk/protocol/websocket/WebSocketChannel.cpp
+++ b/netwerk/protocol/websocket/WebSocketChannel.cpp
@@ -2110,6 +2110,7 @@ WebSocketChannel::PrimeNewOutgoingMessage()
       msgType = kMsgTypeBinaryString;
 
       // no break: fall down into binary string case
+      MOZ_FALLTHROUGH;
 
     case kMsgTypeBinaryString:
       mOutHeader[0] = kFinalFragBit | nsIWebSocketFrame::OPCODE_BINARY;
diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
index 6b0ee38f43..62d011166d 100644
--- a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
@@ -53,6 +53,7 @@ mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
         break;
       }
       // else fall through
+      MOZ_FALLTHROUGH;
     default:
       aStringToAppendTo += ch;
     }
@@ -100,6 +101,7 @@ mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
         break;
       }
       // else fall through
+      MOZ_FALLTHROUGH;
     default:
       i++;
     }
@@ -511,7 +513,7 @@ mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const u
   {
   case '@':
     state[RFC2396E] = unchecked;
-    // no break here
+    MOZ_FALLTHROUGH;
   case '.':
     state[abbreviated] = unchecked;
     break;
diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
index d3f92ad745..e4a9e1e6da 100644
--- a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
+++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
@@ -126,7 +126,6 @@ nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsISupports *aContext,
     status = NS_ERROR_NET_PARTIAL_TRANSFER;
   }
   if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) {
-    uint32_t waste;
     nsCOMPtr<nsIForcePendingChannel> fpChannel = do_QueryInterface(request);
     bool isPending = false;
     if (request) {
@@ -135,7 +134,9 @@ nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsISupports *aContext,
     if (fpChannel && !isPending) {
       fpChannel->ForcePending(true);
     }
-    status = BrotliHandler(nullptr, this, nullptr, 0, 0, &waste);
+    if (mBrotli->mTotalOut == 0 && !BrotliDecoderIsFinished(&mBrotli->mState)) {
+      status = NS_ERROR_INVALID_CONTENT_ENCODING;
+    }
     if (fpChannel && !isPending) {
       fpChannel->ForcePending(false);
     }
@@ -149,6 +150,7 @@ NS_METHOD
 nsHTTPCompressConv::BrotliHandler(nsIInputStream *stream, void *closure, const char *dataIn,
                                   uint32_t, uint32_t aAvail, uint32_t *countRead)
 {
+  MOZ_ASSERT(stream);
   nsHTTPCompressConv *self = static_cast<nsHTTPCompressConv *>(closure);
   *countRead = 0;
 
@@ -255,7 +257,7 @@ nsHTTPCompressConv::OnDataAvailable(nsIRequest* request,
       return NS_OK;
     }
 
-    // FALLTHROUGH
+    MOZ_FALLTHROUGH;
 
   case HTTP_COMPRESS_DEFLATE:
 
diff --git a/release/docker/beet-mover/Dockerfile b/release/docker/beet-mover/Dockerfile
new file mode 100644
index 0000000000..f109c0e247
--- /dev/null
+++ b/release/docker/beet-mover/Dockerfile
@@ -0,0 +1,19 @@
+FROM ubuntu:vivid
+
+RUN apt-get -q update \
+    && apt-get install --yes -q \
+    mercurial \
+    python-dev \
+    python-pip \
+    python-virtualenv \
+    libffi-dev \
+    libssl-dev \
+    libyaml-dev \
+    libmysqlclient-dev \
+    clamav \
+    clamav-freshclam \
+    curl \
+    wget \
+    && apt-get clean
+
+RUN freshclam --verbose
diff --git a/security/manager/ssl/nsCertOverrideService.cpp b/security/manager/ssl/nsCertOverrideService.cpp
index 80a6bb7881..f5dbb86c2d 100644
--- a/security/manager/ssl/nsCertOverrideService.cpp
+++ b/security/manager/ssl/nsCertOverrideService.cpp
@@ -396,19 +396,10 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName,
   if (NS_FAILED(rv))
     return rv;
 
-  char *dbkey = nullptr;
-  rv = aCert->GetDbKey(&dbkey);
-  if (NS_FAILED(rv) || !dbkey)
+  nsAutoCString dbkey;
+  rv = aCert->GetDbKey(dbkey);
+  if (NS_FAILED(rv)) {
     return rv;
-
-  // change \n and \r to spaces in the possibly multi-line-base64-encoded key
-  for (char *dbkey_walk = dbkey;
-       *dbkey_walk;
-      ++dbkey_walk) {
-    char c = *dbkey_walk;
-    if (c == '\r' || c == '\n') {
-      *dbkey_walk = ' ';
-    }
   }
 
   {
@@ -419,11 +410,10 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName,
                    aTemporary, 
                    mDottedOidForStoringNewHashes, fpStr, 
                    (nsCertOverride::OverrideBits)aOverrideBits, 
-                   nsDependentCString(dbkey));
+                   dbkey);
     Write();
   }
 
-  PR_Free(dbkey);
   return NS_OK;
 }
 
@@ -551,6 +541,8 @@ nsCertOverrideService::AddEntryToList(const nsACString &aHostName, int32_t aPort
     settings.mFingerprint = fingerprint;
     settings.mOverrideBits = ob;
     settings.mDBKey = dbKey;
+    // remove whitespace from stored dbKey for backwards compatibility
+    settings.mDBKey.StripWhitespace();
     settings.mCert = aCert;
   }
 
@@ -583,51 +575,14 @@ nsCertOverrideService::ClearValidityOverride(const nsACString & aHostName, int32
 }
 
 static bool
-matchesDBKey(nsIX509Cert *cert, const char *match_dbkey)
+matchesDBKey(nsIX509Cert* cert, const nsCString& matchDbKey)
 {
-  char *dbkey = nullptr;
-  nsresult rv = cert->GetDbKey(&dbkey);
-  if (NS_FAILED(rv) || !dbkey)
+  nsAutoCString dbKey;
+  nsresult rv = cert->GetDbKey(dbKey);
+  if (NS_FAILED(rv)) {
     return false;
-
-  bool found_mismatch = false;
-  const char *key1 = dbkey;
-  const char *key2 = match_dbkey;
-
-  // skip over any whitespace when comparing
-  while (*key1 && *key2) {
-    char c1 = *key1;
-    char c2 = *key2;
-    
-    switch (c1) {
-      case ' ':
-      case '\t':
-      case '\n':
-      case '\r':
-        ++key1;
-        continue;
-    }
-
-    switch (c2) {
-      case ' ':
-      case '\t':
-      case '\n':
-      case '\r':
-        ++key2;
-        continue;
-    }
-
-    if (c1 != c2) {
-      found_mismatch = true;
-      break;
-    }
-
-    ++key1;
-    ++key2;
   }
-
-  PR_Free(dbkey);
-  return !found_mismatch;
+  return dbKey.Equals(matchDbKey);
 }
 
 NS_IMETHODIMP
@@ -651,7 +606,7 @@ nsCertOverrideService::IsCertUsedForOverrides(nsIX509Cert *aCert,
         still_ok = false;
       }
 
-      if (still_ok && matchesDBKey(aCert, settings.mDBKey.get())) {
+      if (still_ok && matchesDBKey(aCert, settings.mDBKey)) {
         nsAutoCString cert_fingerprint;
         nsresult rv = NS_ERROR_UNEXPECTED;
         if (settings.mFingerprintAlgOID.Equals(mDottedOidForStoringNewHashes)) {
@@ -681,7 +636,7 @@ nsCertOverrideService::EnumerateCertOverrides(nsIX509Cert *aCert,
     if (!aCert) {
       aEnumerator(settings, aUserData);
     } else {
-      if (matchesDBKey(aCert, settings.mDBKey.get())) {
+      if (matchesDBKey(aCert, settings.mDBKey)) {
         nsAutoCString cert_fingerprint;
         nsresult rv = NS_ERROR_UNEXPECTED;
         if (settings.mFingerprintAlgOID.Equals(mDottedOidForStoringNewHashes)) {
diff --git a/security/manager/ssl/nsClientAuthRemember.cpp b/security/manager/ssl/nsClientAuthRemember.cpp
index e955ae291e..9e165740ea 100644
--- a/security/manager/ssl/nsClientAuthRemember.cpp
+++ b/security/manager/ssl/nsClientAuthRemember.cpp
@@ -102,29 +102,26 @@ nsClientAuthRememberService::RememberDecision(const nsACString & aHostName,
 {
   // aClientCert == nullptr means: remember that user does not want to use a cert
   NS_ENSURE_ARG_POINTER(aServerCert);
-  if (aHostName.IsEmpty())
+  if (aHostName.IsEmpty()) {
     return NS_ERROR_INVALID_ARG;
+  }
 
   nsAutoCString fpStr;
   nsresult rv = GetCertFingerprintByOidTag(aServerCert, SEC_OID_SHA256, fpStr);
-  if (NS_FAILED(rv))
+  if (NS_FAILED(rv)) {
     return rv;
+  }
 
   {
     ReentrantMonitorAutoEnter lock(monitor);
     if (aClientCert) {
       RefPtr<nsNSSCertificate> pipCert(new nsNSSCertificate(aClientCert));
-      char *dbkey = nullptr;
-      rv = pipCert->GetDbKey(&dbkey);
-      if (NS_SUCCEEDED(rv) && dbkey) {
-        AddEntryToList(aHostName, fpStr, 
-                       nsDependentCString(dbkey));
+      nsAutoCString dbkey;
+      rv = pipCert->GetDbKey(dbkey);
+      if (NS_SUCCEEDED(rv)) {
+        AddEntryToList(aHostName, fpStr, dbkey);
       }
-      if (dbkey) {
-        PORT_Free(dbkey);
-      }
-    }
-    else {
+    } else {
       nsCString empty;
       AddEntryToList(aHostName, fpStr, empty);
     }
diff --git a/security/manager/ssl/nsCryptoHash.cpp b/security/manager/ssl/nsCryptoHash.cpp
index de2a4e3ef0..b917a2d785 100644
--- a/security/manager/ssl/nsCryptoHash.cpp
+++ b/security/manager/ssl/nsCryptoHash.cpp
@@ -301,7 +301,7 @@ nsCryptoHMAC::Init(uint32_t aAlgorithm, nsIKeyObject *aKeyObject)
 
   PK11SymKey* key;
   // GetKeyObj doesn't addref the key
-  rv = aKeyObject->GetKeyObj((void**)&key);
+  rv = aKeyObject->GetKeyObj(&key);
   NS_ENSURE_SUCCESS(rv, rv);
 
   SECItem rawData;
diff --git a/security/manager/ssl/nsIKeyModule.idl b/security/manager/ssl/nsIKeyModule.idl
index 63a775140f..d9e1d11388 100644
--- a/security/manager/ssl/nsIKeyModule.idl
+++ b/security/manager/ssl/nsIKeyModule.idl
@@ -4,46 +4,34 @@
 
 #include "nsISupports.idl"
 
+%{ C++
+ /* forward declaration */
+ typedef struct PK11SymKeyStr PK11SymKey;
+%}
+[ptr] native PK11SymKeyPtr(PK11SymKey);
+
 // An opaque key object.
-[scriptable, uuid(4b31f4ed-9424-4710-b946-79b7e33cf3a8)]
+[scriptable, uuid(ee2dc516-ba7b-4e77-89fe-c43b886ef715)]
 interface nsIKeyObject : nsISupports
 {
   // Key types
   const short SYM_KEY = 1;
-  const short PRIVATE_KEY = 2;
-  const short PUBLIC_KEY = 3;
 
   // Algorithm types
-  const short RC4 = 1;
-  const short AES_CBC = 2;
   const short HMAC = 257;
 
-  // aAlgorithm is an algorithm type
-  // aKey is either a PK11SymKey, SECKEYPublicKey, or a SECKEYPrivateKey.
   // The nsIKeyObject will take ownership of the key and be responsible
   // for freeing the key memory when destroyed.
-  [noscript] void initKey(in short aAlgorithm, in voidPtr aKey);
+  [noscript] void initKey(in short aAlgorithm, in PK11SymKeyPtr aKey);
 
-  // Return a pointer to the underlying key object
-  [noscript] voidPtr getKeyObj();
+  // Returns a pointer to the underlying key object.
+  [noscript] PK11SymKeyPtr getKeyObj();
 
-  // Will return NS_ERROR_NOT_INITIALIZED if initKey hasn't been run
   short getType();
 };
 
-[scriptable, uuid(264eb54d-e20d-49a0-890c-1a5986ea81c4)]
+[scriptable, uuid(838bdbf1-8244-448f-8bcd-cede70227d75)]
 interface nsIKeyObjectFactory : nsISupports
 {
-  nsIKeyObject lookupKeyByName(in ACString aName);
-
-  nsIKeyObject unwrapKey(in short aAlgorithm,
-                         [const, array, size_is(aWrappedKeyLen)] in octet aWrappedKey,
-                         in unsigned long aWrappedKeyLen);
-
-  // TODO: deriveKeyFrom*
-
-
-  // DO NOT USE
-  // This is not FIPS compliant and should not be used.
   nsIKeyObject keyFromString(in short aAlgorithm, in ACString aKey);
 };
diff --git a/security/manager/ssl/nsIX509Cert.idl b/security/manager/ssl/nsIX509Cert.idl
index 9e9dce79ff..c643e6c88e 100644
--- a/security/manager/ssl/nsIX509Cert.idl
+++ b/security/manager/ssl/nsIX509Cert.idl
@@ -20,7 +20,7 @@ interface nsICertVerificationListener;
 /**
  * This represents a X.509 certificate.
  */
-[scriptable, uuid(f8ed8364-ced9-4c6e-86ba-48af53c393e6)]
+[scriptable, uuid(bdc3979a-5422-4cd5-8589-696b6e96ea83)]
 interface nsIX509Cert : nsISupports {
 
   /**
@@ -130,7 +130,7 @@ interface nsIX509Cert : nsISupports {
   /**
    *  A unique identifier of this certificate within the local storage.
    */
-  readonly attribute string dbKey;
+  readonly attribute ACString dbKey;
 
   /**
    *  A human readable identifier to label this certificate.
diff --git a/security/manager/ssl/nsKeyModule.cpp b/security/manager/ssl/nsKeyModule.cpp
index 40cb36005e..f4e8c35ff0 100644
--- a/security/manager/ssl/nsKeyModule.cpp
+++ b/security/manager/ssl/nsKeyModule.cpp
@@ -2,11 +2,10 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include "nsComponentManagerUtils.h"
 #include "nsCOMPtr.h"
+#include "nsComponentManagerUtils.h"
 #include "nsKeyModule.h"
 #include "nsString.h"
-#include "ScopedNSSTypes.h"
 
 using namespace mozilla;
 using namespace mozilla::psm;
@@ -14,107 +13,80 @@ using namespace mozilla::psm;
 NS_IMPL_ISUPPORTS(nsKeyObject, nsIKeyObject)
 
 nsKeyObject::nsKeyObject()
-  : mKeyType(0), mSymKey(nullptr), mPrivateKey(nullptr),
-    mPublicKey(nullptr)
+  : mSymKey(nullptr)
 {
 }
 
 nsKeyObject::~nsKeyObject()
 {
-  CleanUp();
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return;
+  }
+  destructorSafeDestroyNSSReference();
+  shutdown(calledFromObject);
 }
 
 void
-nsKeyObject::CleanUp()
+nsKeyObject::virtualDestroyNSSReference()
 {
-  switch (mKeyType) {
-    case nsIKeyObject::SYM_KEY:
-      PK11_FreeSymKey(mSymKey);
-      break;
-    
-    case nsIKeyObject::PRIVATE_KEY:
-      PK11_DeleteTokenPrivateKey(mPrivateKey, true /* force */);
-      break;
+  destructorSafeDestroyNSSReference();
+}
 
-    case nsIKeyObject::PUBLIC_KEY:
-      PK11_DeleteTokenPublicKey(mPublicKey);
-      break;
-    
-    default:
-      // probably not initialized, do nothing
-      break;
-  }
-  mKeyType = 0;
+void
+nsKeyObject::destructorSafeDestroyNSSReference()
+{
+  mSymKey = nullptr;
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // nsIKeyObject
 
 NS_IMETHODIMP
-nsKeyObject::InitKey(int16_t aAlgorithm, void * aKey)
+nsKeyObject::InitKey(int16_t aAlgorithm, PK11SymKey* aKey)
 {
-  // Clear previous key data if it exists
-  CleanUp();
-
-  switch (aAlgorithm) {
-    case nsIKeyObject::RC4:
-    case nsIKeyObject::HMAC:
-      mSymKey = reinterpret_cast<PK11SymKey*>(aKey);
-
-      if (!mSymKey) {
-        NS_ERROR("no symkey");
-        break;
-      }
-      mKeyType = nsIKeyObject::SYM_KEY;
-      break;
-
-    case nsIKeyObject::AES_CBC:
-      return NS_ERROR_NOT_IMPLEMENTED;
-
-    default:
-      return NS_ERROR_INVALID_ARG;
+  if (!aKey || aAlgorithm != nsIKeyObject::HMAC) {
+    return NS_ERROR_INVALID_ARG;
   }
 
-  // One of these should have been created
-  if (!mSymKey && !mPrivateKey && !mPublicKey)
-    return NS_ERROR_FAILURE;
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
 
+  mSymKey = aKey;
   return NS_OK;
 }
 
 NS_IMETHODIMP
-nsKeyObject::GetKeyObj(void * *_retval)
+nsKeyObject::GetKeyObj(PK11SymKey** _retval)
 {
-  if (mKeyType == 0)
-    return NS_ERROR_NOT_INITIALIZED;
-
-  switch (mKeyType) {
-    case nsIKeyObject::SYM_KEY:
-      *_retval = (void*)mSymKey;
-      break;
-
-    case nsIKeyObject::PRIVATE_KEY:
-      *_retval = (void*)mPublicKey;
-      break;
-
-    case nsIKeyObject::PUBLIC_KEY:
-      *_retval = (void*)mPrivateKey;
-      break;
-
-    default:
-      // unknown key type?  How did that happen?
-      return NS_ERROR_FAILURE;
+  if (!_retval) {
+    return NS_ERROR_INVALID_ARG;
   }
+
+  *_retval = nullptr;
+
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  if (!mSymKey) {
+    return NS_ERROR_NOT_INITIALIZED;
+  }
+
+  *_retval = mSymKey;
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsKeyObject::GetType(int16_t *_retval)
 {
-  if (mKeyType == 0)
-    return NS_ERROR_NOT_INITIALIZED;
-
-  *_retval = mKeyType;
+  if (!_retval) {
+    return NS_ERROR_INVALID_ARG;
+  }
+  *_retval = nsIKeyObject::SYM_KEY;
   return NS_OK;
 }
 
@@ -127,46 +99,37 @@ nsKeyObjectFactory::nsKeyObjectFactory()
 {
 }
 
-NS_IMETHODIMP
-nsKeyObjectFactory::LookupKeyByName(const nsACString & aName,
-                                    nsIKeyObject **_retval)
+nsKeyObjectFactory::~nsKeyObjectFactory()
 {
-  return NS_ERROR_NOT_IMPLEMENTED;
-}
- 
-NS_IMETHODIMP
-nsKeyObjectFactory::UnwrapKey(int16_t aAlgorithm, const uint8_t *aWrappedKey,
-                              uint32_t aWrappedKeyLen, nsIKeyObject **_retval)
-{
-  return NS_ERROR_NOT_IMPLEMENTED;
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return;
+  }
+  shutdown(calledFromObject);
 }
 
 NS_IMETHODIMP
-nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString & aKey,
-                                  nsIKeyObject **_retval)
+nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString& aKey,
+                                  nsIKeyObject** _retval)
 {
-  CK_MECHANISM_TYPE cipherMech;
-  CK_ATTRIBUTE_TYPE cipherOperation;
-  switch (aAlgorithm)
-  {
-  case nsIKeyObject::HMAC:
-    cipherMech = CKM_GENERIC_SECRET_KEY_GEN;
-    cipherOperation = CKA_SIGN;
-    break;
-
-  case nsIKeyObject::RC4:
-    cipherMech = CKM_RC4;
-    cipherOperation = CKA_ENCRYPT;
-    break;
-
-  default:
+  if (!_retval || aAlgorithm != nsIKeyObject::HMAC) {
     return NS_ERROR_INVALID_ARG;
   }
 
+  nsNSSShutDownPreventionLock locker;
+  if (isAlreadyShutDown()) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  CK_MECHANISM_TYPE cipherMech = CKM_GENERIC_SECRET_KEY_GEN;
+  CK_ATTRIBUTE_TYPE cipherOperation = CKA_SIGN;
+
   nsresult rv;
-  nsCOMPtr<nsIKeyObject> key =
-      do_CreateInstance(NS_KEYMODULEOBJECT_CONTRACTID, &rv);
-  NS_ENSURE_SUCCESS(rv, rv);
+  nsCOMPtr<nsIKeyObject> key(
+    do_CreateInstance(NS_KEYMODULEOBJECT_CONTRACTID, &rv));
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
 
   // Convert the raw string into a SECItem
   const nsCString& flatKey = PromiseFlatCString(aKey);
@@ -176,18 +139,20 @@ nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString & aKey,
 
   ScopedPK11SlotInfo slot(PK11_GetBestSlot(cipherMech, nullptr));
   if (!slot) {
-    NS_ERROR("no slot");
     return NS_ERROR_FAILURE;
   }
 
-  PK11SymKey* symKey = PK11_ImportSymKey(slot, cipherMech, PK11_OriginUnwrap,
-                                         cipherOperation, &keyItem, nullptr);
+  ScopedPK11SymKey symKey(PK11_ImportSymKey(slot, cipherMech,
+                                            PK11_OriginUnwrap, cipherOperation,
+                                            &keyItem, nullptr));
   if (!symKey) {
     return NS_ERROR_FAILURE;
   }
-  
-  rv = key->InitKey(aAlgorithm, (void*)symKey);
-  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = key->InitKey(aAlgorithm, symKey.forget());
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
 
   key.swap(*_retval);
   return NS_OK;
diff --git a/security/manager/ssl/nsKeyModule.h b/security/manager/ssl/nsKeyModule.h
index b1e6c2d38a..7c844be073 100644
--- a/security/manager/ssl/nsKeyModule.h
+++ b/security/manager/ssl/nsKeyModule.h
@@ -2,25 +2,25 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#ifndef _NS_KEYMODULE_H_
-#define _NS_KEYMODULE_H_
+#ifndef nsKeyModule_h
+#define nsKeyModule_h
 
+#include "ScopedNSSTypes.h"
 #include "nsIKeyModule.h"
+#include "nsNSSShutDown.h"
 #include "pk11pub.h"
-#include "mozilla/Attributes.h"
 
-/* eae599aa-ecef-49c6-a8af-6ddcc6feb484 */
 #define NS_KEYMODULEOBJECT_CID   \
-{ 0xeae599aa, 0xecef, 0x49c6, {0xa8, 0xaf, 0x6d, 0xdc, 0xc6, 0xfe, 0xb4, 0x84} }
+{ 0x9d383ddd, 0x6856, 0x4187, {0x84, 0x85, 0xf3, 0x61, 0x95, 0xb2, 0x9a, 0x0e} }
 #define NS_KEYMODULEOBJECT_CONTRACTID "@mozilla.org/security/keyobject;1"
 
-/* a39e0e9d-e567-41e3-b12c-5df67f18174d */
 #define NS_KEYMODULEOBJECTFACTORY_CID   \
-{ 0xa39e0e9d, 0xe567, 0x41e3, {0xb1, 0x2c, 0x5d, 0xf6, 0x7f, 0x18, 0x17, 0x4d} }
+{ 0x2a35dd47, 0xb026, 0x4e8d, {0xb6, 0xb7, 0x57, 0x40, 0xf6, 0x1a, 0xb9, 0x02} }
 #define NS_KEYMODULEOBJECTFACTORY_CONTRACTID \
 "@mozilla.org/security/keyobjectfactory;1"
 
 class nsKeyObject final : public nsIKeyObject
+                        , public nsNSSShutDownObject
 {
 public:
   nsKeyObject();
@@ -30,24 +30,19 @@ public:
 
 private:
   ~nsKeyObject();
-  
+
   // Disallow copy constructor
   nsKeyObject(nsKeyObject&);
 
-  // 0 if not yet set, otherwise one of the nsIKeyObject::*KEY values
-  uint32_t mKeyType;
-  
-  // A union of our possible key types
-  PK11SymKey* mSymKey;
-  SECKEYPrivateKey* mPrivateKey;
-  SECKEYPublicKey* mPublicKey;
+  ScopedPK11SymKey mSymKey;
 
-  // Helper method to free memory used by keys.
-  void CleanUp();
+  virtual void virtualDestroyNSSReference() override;
+  void destructorSafeDestroyNSSReference();
 };
 
 
 class nsKeyObjectFactory final : public nsIKeyObjectFactory
+                               , public nsNSSShutDownObject
 {
 public:
   nsKeyObjectFactory();
@@ -56,10 +51,13 @@ public:
   NS_DECL_NSIKEYOBJECTFACTORY
 
 private:
-  ~nsKeyObjectFactory() {}
+  ~nsKeyObjectFactory();
 
   // Disallow copy constructor
   nsKeyObjectFactory(nsKeyObjectFactory&);
+
+  // No NSS resources to release.
+  virtual void virtualDestroyNSSReference() override {}
 };
 
-#endif // _NS_KEYMODULE_H_
+#endif // nsKeyModule_h
diff --git a/security/manager/ssl/nsNSSCertificate.cpp b/security/manager/ssl/nsNSSCertificate.cpp
index 76a9836f75..17daac89ab 100644
--- a/security/manager/ssl/nsNSSCertificate.cpp
+++ b/security/manager/ssl/nsNSSCertificate.cpp
@@ -50,6 +50,10 @@
 #include "ssl.h"
 #include "plbase64.h"
 
+#ifdef XP_WIN
+#include <winsock.h> // for htonl
+#endif
+
 using namespace mozilla;
 using namespace mozilla::psm;
 
@@ -486,32 +490,37 @@ nsNSSCertificate::FormatUIStrings(const nsAutoString& nickname,
 }
 
 NS_IMETHODIMP
-nsNSSCertificate::GetDbKey(char** aDbKey)
+nsNSSCertificate::GetDbKey(nsACString& aDbKey)
 {
   nsNSSShutDownPreventionLock locker;
-  if (isAlreadyShutDown())
+  if (isAlreadyShutDown()) {
     return NS_ERROR_NOT_AVAILABLE;
+  }
 
-  SECItem key;
+  static_assert(sizeof(uint64_t) == 8, "type size sanity check");
+  static_assert(sizeof(uint32_t) == 4, "type size sanity check");
+  // The format of the key is the base64 encoding of the following:
+  // 4 bytes: {0, 0, 0, 0} (this was intended to be the module ID, but it was
+  //                        never implemented)
+  // 4 bytes: {0, 0, 0, 0} (this was intended to be the slot ID, but it was
+  //                        never implemented)
+  // 4 bytes: <serial number length in big-endian order>
+  // 4 bytes: <DER-encoded issuer distinguished name length in big-endian order>
+  // n bytes: <bytes of serial number>
+  // m bytes: <DER-encoded issuer distinguished name>
+  nsAutoCString buf;
+  const char leadingZeroes[] = {0, 0, 0, 0, 0, 0, 0, 0};
+  buf.Append(leadingZeroes, sizeof(leadingZeroes));
+  uint32_t serialNumberLen = htonl(mCert->serialNumber.len);
+  buf.Append(reinterpret_cast<const char*>(&serialNumberLen), sizeof(uint32_t));
+  uint32_t issuerLen = htonl(mCert->derIssuer.len);
+  buf.Append(reinterpret_cast<const char*>(&issuerLen), sizeof(uint32_t));
+  buf.Append(reinterpret_cast<const char*>(mCert->serialNumber.data),
+             mCert->serialNumber.len);
+  buf.Append(reinterpret_cast<const char*>(mCert->derIssuer.data),
+             mCert->derIssuer.len);
 
-  NS_ENSURE_ARG(aDbKey);
-  *aDbKey = nullptr;
-  key.len = NS_NSS_LONG*4+mCert->serialNumber.len+mCert->derIssuer.len;
-  key.data = (unsigned char*) moz_xmalloc(key.len);
-  if (!key.data)
-    return NS_ERROR_OUT_OF_MEMORY;
-  NS_NSS_PUT_LONG(0,key.data); // later put moduleID
-  NS_NSS_PUT_LONG(0,&key.data[NS_NSS_LONG]); // later put slotID
-  NS_NSS_PUT_LONG(mCert->serialNumber.len,&key.data[NS_NSS_LONG*2]);
-  NS_NSS_PUT_LONG(mCert->derIssuer.len,&key.data[NS_NSS_LONG*3]);
-  memcpy(&key.data[NS_NSS_LONG*4], mCert->serialNumber.data,
-         mCert->serialNumber.len);
-  memcpy(&key.data[NS_NSS_LONG*4+mCert->serialNumber.len],
-         mCert->derIssuer.data, mCert->derIssuer.len);
-
-  *aDbKey = NSSBase64_EncodeItem(nullptr, nullptr, 0, &key);
-  free(key.data); // SECItem is a 'c' type without a destructor
-  return (*aDbKey) ? NS_OK : NS_ERROR_FAILURE;
+  return Base64Encode(buf, aDbKey);
 }
 
 NS_IMETHODIMP
diff --git a/security/manager/ssl/nsNSSCertificate.h b/security/manager/ssl/nsNSSCertificate.h
index ba8f29e302..621df9a3fb 100644
--- a/security/manager/ssl/nsNSSCertificate.h
+++ b/security/manager/ssl/nsNSSCertificate.h
@@ -132,17 +132,6 @@ private:
    void operator=(const nsNSSCertListEnumerator&) = delete;
 };
 
-
-#define NS_NSS_LONG 4
-#define NS_NSS_GET_LONG(x) ((((unsigned long)((x)[0])) << 24) | \
-                            (((unsigned long)((x)[1])) << 16) | \
-                            (((unsigned long)((x)[2])) <<  8) | \
-                             ((unsigned long)((x)[3])) )
-#define NS_NSS_PUT_LONG(src,dest) (dest)[0] = (((src) >> 24) & 0xff); \
-                                  (dest)[1] = (((src) >> 16) & 0xff); \
-                                  (dest)[2] = (((src) >>  8) & 0xff); \
-                                  (dest)[3] = ((src) & 0xff);
-
 #define NS_X509CERT_CID { /* 660a3226-915c-4ffb-bb20-8985a632df05 */   \
     0x660a3226,                                                        \
     0x915c,                                                            \
diff --git a/security/manager/ssl/nsNSSCertificateDB.cpp b/security/manager/ssl/nsNSSCertificateDB.cpp
index 4234f59791..afab571cde 100644
--- a/security/manager/ssl/nsNSSCertificateDB.cpp
+++ b/security/manager/ssl/nsNSSCertificateDB.cpp
@@ -48,6 +48,10 @@
 #include "ssl.h"
 #include "plbase64.h"
 
+#ifdef XP_WIN
+#include <winsock.h> // for ntohl
+#endif
+
 using namespace mozilla;
 using namespace mozilla::psm;
 using mozilla::psm::SharedSSLState;
@@ -137,42 +141,58 @@ nsNSSCertificateDB::FindCertByDBKey(const char *aDBkey, nsISupports *aToken,
     return NS_ERROR_NOT_AVAILABLE;
   }
 
-  SECItem keyItem = {siBuffer, nullptr, 0};
-  SECItem *dummy;
+  static_assert(sizeof(uint64_t) == 8, "type size sanity check");
+  static_assert(sizeof(uint32_t) == 4, "type size sanity check");
+  // (From nsNSSCertificate::GetDbKey)
+  // The format of the key is the base64 encoding of the following:
+  // 4 bytes: {0, 0, 0, 0} (this was intended to be the module ID, but it was
+  //                        never implemented)
+  // 4 bytes: {0, 0, 0, 0} (this was intended to be the slot ID, but it was
+  //                        never implemented)
+  // 4 bytes: <serial number length in big-endian order>
+  // 4 bytes: <DER-encoded issuer distinguished name length in big-endian order>
+  // n bytes: <bytes of serial number>
+  // m bytes: <DER-encoded issuer distinguished name>
+  nsAutoCString decoded;
+  nsAutoCString tmpDBKey(aDBkey);
+  // Filter out any whitespace for backwards compatibility.
+  tmpDBKey.StripWhitespace();
+  nsresult rv = Base64Decode(tmpDBKey, decoded);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+  if (decoded.Length() < 16) {
+    return NS_ERROR_ILLEGAL_INPUT;
+  }
+  const char* reader = decoded.BeginReading();
+  uint64_t zeroes = *reinterpret_cast<const uint64_t*>(reader);
+  if (zeroes != 0) {
+    return NS_ERROR_ILLEGAL_INPUT;
+  }
+  reader += sizeof(uint64_t);
+  uint32_t serialNumberLen = ntohl(*reinterpret_cast<const uint32_t*>(reader));
+  reader += sizeof(uint32_t);
+  uint32_t issuerLen = ntohl(*reinterpret_cast<const uint32_t*>(reader));
+  reader += sizeof(uint32_t);
+  if (decoded.Length() != 16ULL + serialNumberLen + issuerLen) {
+    return NS_ERROR_ILLEGAL_INPUT;
+  }
   CERTIssuerAndSN issuerSN;
-  //unsigned long moduleID,slotID;
+  issuerSN.serialNumber.len = serialNumberLen;
+  issuerSN.serialNumber.data = (unsigned char*)reader;
+  reader += serialNumberLen;
+  issuerSN.derIssuer.len = issuerLen;
+  issuerSN.derIssuer.data = (unsigned char*)reader;
+  reader += issuerLen;
+  MOZ_ASSERT(reader == decoded.EndReading());
 
-  dummy = NSSBase64_DecodeBuffer(nullptr, &keyItem, aDBkey,
-                                 (uint32_t)strlen(aDBkey)); 
-  if (!dummy || keyItem.len < NS_NSS_LONG*4) {
-    PR_FREEIF(keyItem.data);
-    return NS_ERROR_INVALID_ARG;
-  }
-
-  ScopedCERTCertificate cert;
-  // someday maybe we can speed up the search using the moduleID and slotID
-  // moduleID = NS_NSS_GET_LONG(keyItem.data);
-  // slotID = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG]);
-
-  // build the issuer/SN structure
-  issuerSN.serialNumber.len = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG*2]);
-  issuerSN.derIssuer.len = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG*3]);
-  if (issuerSN.serialNumber.len == 0 || issuerSN.derIssuer.len == 0
-      || issuerSN.serialNumber.len + issuerSN.derIssuer.len
-         != keyItem.len - NS_NSS_LONG*4) {
-    PR_FREEIF(keyItem.data);
-    return NS_ERROR_INVALID_ARG;
-  }
-  issuerSN.serialNumber.data= &keyItem.data[NS_NSS_LONG*4];
-  issuerSN.derIssuer.data= &keyItem.data[NS_NSS_LONG*4+
-                                              issuerSN.serialNumber.len];
-
-  cert = CERT_FindCertByIssuerAndSN(CERT_GetDefaultCertDB(), &issuerSN);
-  PR_FREEIF(keyItem.data);
+  ScopedCERTCertificate cert(
+    CERT_FindCertByIssuerAndSN(CERT_GetDefaultCertDB(), &issuerSN));
   if (cert) {
     nsCOMPtr<nsIX509Cert> nssCert = nsNSSCertificate::Create(cert.get());
-    if (!nssCert)
+    if (!nssCert) {
       return NS_ERROR_OUT_OF_MEMORY;
+    }
     nssCert.forget(_cert);
   }
   return NS_OK;
@@ -1221,12 +1241,10 @@ nsNSSCertificateDB::getCertNames(CERTCertList *certList,
        node = CERT_LIST_NEXT(node)) {
     if (getCertType(node->cert) == type) {
       RefPtr<nsNSSCertificate> pipCert(new nsNSSCertificate(node->cert));
-      char *dbkey = nullptr;
-      char *namestr = nullptr;
-      nsAutoString certstr;
-      pipCert->GetDbKey(&dbkey);
+      nsAutoCString dbkey;
+      pipCert->GetDbKey(dbkey);
       nsAutoString keystr = NS_ConvertASCIItoUTF16(dbkey);
-      PR_FREEIF(dbkey);
+      char *namestr = nullptr;
       if (type == nsIX509Cert::EMAIL_CERT) {
         namestr = node->cert->emailAddr;
       } else {
@@ -1237,6 +1255,7 @@ nsNSSCertificateDB::getCertNames(CERTCertList *certList,
         }
       }
       nsAutoString certname = NS_ConvertASCIItoUTF16(namestr ? namestr : "");
+      nsAutoString certstr;
       certstr.Append(char16_t(DELIM));
       certstr += certname;
       certstr.Append(char16_t(DELIM));
diff --git a/security/manager/ssl/nsNSSCertificateFakeTransport.cpp b/security/manager/ssl/nsNSSCertificateFakeTransport.cpp
index 3b058202dc..766f98933f 100644
--- a/security/manager/ssl/nsNSSCertificateFakeTransport.cpp
+++ b/security/manager/ssl/nsNSSCertificateFakeTransport.cpp
@@ -31,7 +31,7 @@ nsNSSCertificateFakeTransport::~nsNSSCertificateFakeTransport()
 }
 
 NS_IMETHODIMP
-nsNSSCertificateFakeTransport::GetDbKey(char**)
+nsNSSCertificateFakeTransport::GetDbKey(nsACString&)
 {
   NS_NOTREACHED("Unimplemented on content process");
   return NS_ERROR_NOT_IMPLEMENTED;
diff --git a/security/manager/ssl/nsNSSShutDown.h b/security/manager/ssl/nsNSSShutDown.h
index 49a3498722..fa6a0add12 100644
--- a/security/manager/ssl/nsNSSShutDown.h
+++ b/security/manager/ssl/nsNSSShutDown.h
@@ -129,6 +129,22 @@ protected:
   the tracking list, to ensure no additional attempt to free the resources
   will be made.
 
+  ----------------------------------------------------------------------------
+  IMPORTANT NOTE REGARDING CLASSES THAT IMPLEMENT nsNSSShutDownObject BUT DO
+  NOT DIRECTLY HOLD NSS RESOURCES:
+  ----------------------------------------------------------------------------
+  Currently, classes that do not hold NSS resources but do call NSS functions
+  inherit from nsNSSShutDownObject (and use the lock/isAlreadyShutDown
+  mechanism) as a way of ensuring it is safe to call those functions. Because
+  these classes do not hold any resources, however, it is tempting to skip the
+  destructor component of this interface. This MUST NOT be done, because
+  if an object of such a class is destructed before the nsNSSShutDownList
+  processes all of its entries, this essentially causes a use-after-free when
+  nsNSSShutDownList reaches the entry that has been destroyed. The safe way to
+  do this is to implement the destructor as usual but omit the call to
+  destructorSafeDestroyNSSReference() as it is unnecessary and probably isn't
+  defined for that class.
+
   destructorSafeDestroyNSSReference() does not need to acquire an
   nsNSSShutDownPreventionLock or check isAlreadyShutDown() as long as it
   is only called by the destructor that has already acquired the lock and
diff --git a/security/manager/ssl/tests/unit/test_cert_dbKey.js b/security/manager/ssl/tests/unit/test_cert_dbKey.js
new file mode 100644
index 0000000000..742264e769
--- /dev/null
+++ b/security/manager/ssl/tests/unit/test_cert_dbKey.js
@@ -0,0 +1,152 @@
+// -*- Mode: javascript; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"use strict";
+
+// This test tests that the nsIX509Cert.dbKey and nsIX509CertDB.findCertByDBKey
+// APIs work as expected. That is, getting a certificate's dbKey and using it
+// in findCertByDBKey should return the same certificate. Also, for backwards
+// compatibility, findCertByDBKey should ignore any whitespace in its input
+// (even though now nsIX509Cert.dbKey will never have whitespace in it).
+
+function hexStringToBytes(hex) {
+  let bytes = [];
+  for (let hexByteStr of hex.split(":")) {
+    bytes.push(parseInt(hexByteStr, 16));
+  }
+  return bytes;
+}
+
+function encodeCommonNameAsBytes(commonName) {
+  // The encoding will look something like this (in hex):
+  // 30 (SEQUENCE) <length of contents>
+  //    31 (SET) <length of contents>
+  //       30 (SEQUENCE) <length of contents>
+  //          06 (OID) 03 (length)
+  //             55 04 03 (id-at-commonName)
+  //          0C (UTF8String) <length of common name>
+  //             <common name bytes>
+  // To make things simple, it would be nice to have the length of each
+  // component be less than 128 bytes (so we can have single-byte lengths).
+  // For this to hold, the maximum length of the contents of the outermost
+  // SEQUENCE must be 127. Everything not in the contents of the common name
+  // will take up 11 bytes, so the value of the common name itself can be at
+  // most 116 bytes.
+  ok(commonName.length <= 116,
+     "test assumption: common name can't be longer than 116 bytes (makes " +
+     "DER encoding easier)");
+  let commonNameOIDBytes = [ 0x06, 0x03, 0x55, 0x04, 0x03 ];
+  let commonNameBytes = [ 0x0C, commonName.length ];
+  for (let i = 0; i < commonName.length; i++) {
+    commonNameBytes.push(commonName.charCodeAt(i));
+  }
+  let bytes = commonNameOIDBytes.concat(commonNameBytes);
+  bytes.unshift(bytes.length);
+  bytes.unshift(0x30); // SEQUENCE
+  bytes.unshift(bytes.length);
+  bytes.unshift(0x31); // SET
+  bytes.unshift(bytes.length);
+  bytes.unshift(0x30); // SEQUENCE
+  return bytes;
+}
+
+function testInvalidDBKey(certDB, dbKey) {
+  let exceptionCaught = false;
+  try {
+    let cert = certDB.findCertByDBKey(dbKey, null);
+  } catch(e) {
+    do_print(e);
+    exceptionCaught = true;
+  }
+  ok(exceptionCaught, "should have thrown and caught an exception");
+}
+
+function testDBKeyForNonexistentCert(certDB, dbKey) {
+  let cert = certDB.findCertByDBKey(dbKey, null);
+  ok(!cert, "shouldn't find cert for given dbKey");
+}
+
+function byteArrayToByteString(bytes) {
+  let byteString = "";
+  for (let b of bytes) {
+    byteString += String.fromCharCode(b);
+  }
+  return byteString;
+}
+
+function run_test() {
+  do_get_profile();
+  let certDB = Cc["@mozilla.org/security/x509certdb;1"]
+                 .getService(Ci.nsIX509CertDB);
+  let cert = constructCertFromFile("bad_certs/test-ca.pem");
+  equal(cert.issuerName, "CN=" + cert.issuerCommonName,
+        "test assumption: this certificate's issuer distinguished name " +
+        "consists only of a common name");
+  let issuerBytes = encodeCommonNameAsBytes(cert.issuerCommonName);
+  ok(issuerBytes.length < 256,
+     "test assumption: length of encoded issuer is less than 256 bytes");
+  let serialNumberBytes = hexStringToBytes(cert.serialNumber);
+  ok(serialNumberBytes.length < 256,
+     "test assumption: length of encoded serial number is less than 256 bytes");
+  let dbKeyHeader = [ 0, 0, 0, 0, 0, 0, 0, 0,
+                      0, 0, 0, serialNumberBytes.length,
+                      0, 0, 0, issuerBytes.length ];
+  let expectedDbKeyBytes = dbKeyHeader.concat(serialNumberBytes, issuerBytes);
+  let expectedDbKey = btoa(byteArrayToByteString(expectedDbKeyBytes));
+  equal(cert.dbKey, expectedDbKey,
+        "actual and expected dbKey values should match");
+
+  let certFromDbKey = certDB.findCertByDBKey(expectedDbKey, null);
+  ok(certFromDbKey.equals(cert),
+     "nsIX509CertDB.findCertByDBKey should find the right certificate");
+
+  ok(expectedDbKey.length > 64,
+     "test assumption: dbKey should be longer than 64 characters");
+  let expectedDbKeyWithCRLF = expectedDbKey.replace(/(.{64})/, "$1\r\n");
+  ok(expectedDbKeyWithCRLF.indexOf("\r\n") == 64,
+     "test self-check: adding CRLF to dbKey should succeed");
+  certFromDbKey = certDB.findCertByDBKey(expectedDbKeyWithCRLF, null);
+  ok(certFromDbKey.equals(cert),
+     "nsIX509CertDB.findCertByDBKey should work with dbKey with CRLF");
+
+  let expectedDbKeyWithSpaces = expectedDbKey.replace(/(.{64})/, "$1  ");
+  ok(expectedDbKeyWithSpaces.indexOf("  ") == 64,
+     "test self-check: adding spaces to dbKey should succeed");
+  certFromDbKey = certDB.findCertByDBKey(expectedDbKeyWithSpaces, null);
+  ok(certFromDbKey.equals(cert),
+     "nsIX509CertDB.findCertByDBKey should work with dbKey with spaces");
+
+  // Test some invalid dbKey values.
+  testInvalidDBKey(certDB, "AAAA"); // Not long enough.
+  // No header.
+  testInvalidDBKey(certDB, btoa(byteArrayToByteString(
+    [ 0, 0, 0, serialNumberBytes.length,
+      0, 0, 0, issuerBytes.length ].concat(serialNumberBytes, issuerBytes))));
+  testInvalidDBKey(certDB, btoa(byteArrayToByteString(
+    [ 0, 0, 0, 0, 0, 0, 0, 0,
+      255, 255, 255, 255, // serial number length is way too long
+      255, 255, 255, 255, // issuer length is way too long
+      0, 0, 0, 0 ])));
+  // Truncated issuer.
+  testInvalidDBKey(certDB, btoa(byteArrayToByteString(
+    [ 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 1,
+      0, 0, 0, 10,
+      1,
+      1, 2, 3 ])));
+  // Issuer doesn't decode to valid common name.
+  testDBKeyForNonexistentCert(certDB, btoa(byteArrayToByteString(
+    [ 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 1,
+      0, 0, 0, 3,
+      1,
+      1, 2, 3 ])));
+
+  // zero-length serial number and issuer -> no such certificate
+  testDBKeyForNonexistentCert(certDB, btoa(byteArrayToByteString(
+    [ 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0 ])));
+}
diff --git a/security/manager/ssl/tests/unit/test_weak_crypto.js b/security/manager/ssl/tests/unit/test_weak_crypto.js
index d93fe7c621..ab3bfaca18 100644
--- a/security/manager/ssl/tests/unit/test_weak_crypto.js
+++ b/security/manager/ssl/tests/unit/test_weak_crypto.js
@@ -103,8 +103,6 @@ function storeCertOverride(port, cert) {
 }
 
 function startClient(port, expectedResult, options = {}) {
-  let SSL_ERROR_BASE = Ci.nsINSSErrorsService.NSS_SSL_ERROR_BASE;
-  let SSL_ERROR_BAD_CERT_ALERT = SSL_ERROR_BASE + 17;
   let transport =
     socketTransportService.createTransport(["ssl"], 1, "127.0.0.1", port, null);
   if (options.isPrivate) {
@@ -113,8 +111,7 @@ function startClient(port, expectedResult, options = {}) {
   let input;
   let output;
 
-  let inputDeferred = Promise.defer();
-  let outputDeferred = Promise.defer();
+  let deferred = Promise.defer();
 
   let handler = {
 
@@ -125,46 +122,48 @@ function startClient(port, expectedResult, options = {}) {
     },
 
     onInputStreamReady: function(input) {
-      let errorCode = Cr.NS_OK;
       try {
         let data = NetUtil.readInputStreamToString(input, input.available());
         equal(data, "HELLO", "Echoed data received");
         input.close();
         output.close();
+        deferred.resolve();
       } catch (e) {
-        errorCode = e.result;
-      }
-      try {
-        equal(errorCode, expectedResult,
-              "Actual and expected connection result should match");
-        inputDeferred.resolve();
-      } catch (e) {
-        inputDeferred.reject(e);
+        deferred.reject(e);
       }
     },
 
     onOutputStreamReady: function(output) {
       try {
-        output.write("HELLO", 5);
+        try {
+          output.write("HELLO", 5);
+        } catch (e) {
+          if (e.result == Cr.NS_BASE_STREAM_WOULD_BLOCK) {
+            output.asyncWait(handler, 0, 0, Services.tm.currentThread);
+            return;
+          }
+          equal(e.result, expectedResult,
+                "Actual and expected connection result should match");
+          output.close();
+          deferred.resolve();
+          return;
+        }
+        equal(Cr.NS_OK, expectedResult, "Connection should succeed");
         do_print("Output to server written");
-        outputDeferred.resolve();
         input = transport.openInputStream(0, 0, 0);
         input.asyncWait(handler, 0, 0, Services.tm.currentThread);
       } catch (e) {
-        let errorCode = -1 * (e.result & 0xFFFF);
-        if (errorCode == SSL_ERROR_BAD_CERT_ALERT) {
-          do_print("Server doesn't like client cert");
-        }
-        outputDeferred.reject(e);
+        deferred.reject(e);
       }
     }
 
   };
 
   transport.setEventSink(handler, Services.tm.currentThread);
-  output = transport.openOutputStream(0, 0, 0);
+  output = transport.openOutputStream(Ci.nsITransport.OPEN_UNBUFFERED, 0, 0);
+  output.QueryInterface(Ci.nsIAsyncOutputStream);
 
-  return Promise.all([inputDeferred.promise, outputDeferred.promise]);
+  return deferred.promise;
 }
 
 function run_test() {
diff --git a/security/manager/ssl/tests/unit/xpcshell.ini b/security/manager/ssl/tests/unit/xpcshell.ini
index 2771027048..66e953226f 100644
--- a/security/manager/ssl/tests/unit/xpcshell.ini
+++ b/security/manager/ssl/tests/unit/xpcshell.ini
@@ -54,6 +54,7 @@ skip-if = toolkit == 'android' || toolkit == 'gonk'
 [test_pinning_dynamic.js]
 [test_pinning_header_parsing.js]
 
+[test_cert_dbKey.js]
 [test_cert_keyUsage.js]
 [test_logoutAndTeardown.js]
 run-sequentially = hardcoded ports
diff --git a/testing/mozharness/configs/beetmover/en_us.yml.tmpl b/testing/mozharness/configs/beetmover/en_us.yml.tmpl
new file mode 100644
index 0000000000..7a372d8ae5
--- /dev/null
+++ b/testing/mozharness/configs/beetmover/en_us.yml.tmpl
@@ -0,0 +1,84 @@
+---
+metadata:
+    name: "Beet Mover Manifest"
+    description: "Maps artifact locations to s3 key names for the en-US locale"
+    owner: "release@mozilla.com"
+
+mapping:
+{% for locale in locales %}
+  # common deliverables
+  {{ locale }}:
+    complete_mar:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.complete.mar
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ version }}.complete.mar
+    checksum:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.checksums
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.checksums
+    checksum_sig:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.checksums.asc
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.checksums.asc
+    buildinfo:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.json
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.json
+    mozinfo:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.mozinfo.json
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.mozinfo.json
+    socorroinfo:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.txt
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.txt
+
+  {% if platform == "win32" %}
+    full_installer:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer.exe
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup {{ version }}.exe
+    stub_installer:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer-stub.exe
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup Stub {{ version }}.exe
+    package:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.zip
+    symbols:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip
+  {% endif %}
+
+  {% if platform == "win64" %}
+    full_installer:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer.exe
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup {{ version }}.exe
+    package:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.zip
+    symbols:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip
+  {% endif %}
+
+  {% if platform == "linux-i686" %}
+    package:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.tar.bz2
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.tar.bz2
+    symbols:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip
+  {% endif %}
+
+  {% if platform == "linux-x86_64" %}
+    package:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.tar.bz2
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.tar.bz2
+    symbols:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip
+  {% endif %}
+
+  {% if platform == "mac" %}
+    package:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.dmg
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox {{ version }}.dmg
+    symbols:
+      artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox {{ version }}.crashreporter-symbols.zip
+  {% endif %}
+
+{% endfor %}
diff --git a/testing/mozharness/configs/beetmover/partials.yml.tmpl b/testing/mozharness/configs/beetmover/partials.yml.tmpl
new file mode 100644
index 0000000000..0f4d2c353a
--- /dev/null
+++ b/testing/mozharness/configs/beetmover/partials.yml.tmpl
@@ -0,0 +1,16 @@
+---
+metadata:
+    name: "Beet Mover Manifest"
+    description: "Maps artifact locations to s3 key names for partials"
+    owner: "release@mozilla.com"
+
+mapping:
+{% for locale in locales %}
+  {{ locale }}:
+    partial_mar:
+      artifact: {{ artifact_base_url }}/firefox-{{ partial_version }}-{{ version }}.{{ locale }}.{{ platform }}.partial.mar
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ partial_version }}-{{ version }}.partial.mar
+    partial_mar_sig:
+      artifact: {{ artifact_base_url }}/firefox-{{ partial_version }}-{{ version }}.{{ locale }}.{{ platform }}.partial.mar.asc
+      s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ partial_version }}-{{ version }}.partial.mar.asc
+{% endfor %}
diff --git a/testing/mozharness/external_tools/extract_and_run_command.py b/testing/mozharness/external_tools/extract_and_run_command.py
new file mode 100644
index 0000000000..ab48ee1dff
--- /dev/null
+++ b/testing/mozharness/external_tools/extract_and_run_command.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+"""\
+Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories]
+    -j is the number of workers to start, defaulting to 1.
+    [command to run] must be a command that can accept one or many files
+    to process as arguments.
+
+WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to
+         terminate it early.
+ """
+
+### The canonical location for this file is
+###   https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py
+###
+### Please update the copy in puppet to deploy new changes to
+### stage.mozilla.org, see
+# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage
+
+import logging
+import os
+from os import path
+import sys
+from Queue import Queue
+import shutil
+import subprocess
+import tempfile
+from threading import Thread
+import time
+
+logging.basicConfig(
+    stream=sys.stdout, level=logging.INFO, format="%(message)s")
+log = logging.getLogger(__name__)
+
+try:
+    # the future - https://github.com/mozilla/build-mar via a venv
+    from mardor.marfile import BZ2MarFile
+except:
+    # the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py
+    sys.path.append(
+        path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils"))
+    from mar import BZ2MarFile
+
+SEVENZIP = "7za"
+
+
+def extractMar(filename, tempdir):
+    m = BZ2MarFile(filename)
+    m.extractall(path=tempdir)
+
+
+def extractExe(filename, tempdir):
+    try:
+        # We don't actually care about output, put we redirect to a tempfile
+        # to avoid deadlocking in wait() when stdout=PIPE
+        fd = tempfile.TemporaryFile()
+        proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename],
+                                stdout=fd, stderr=subprocess.STDOUT)
+        proc.wait()
+    except subprocess.CalledProcessError:
+        # Not all EXEs are 7-zip files, so we have to ignore extraction errors
+        pass
+
+# The keys here are matched against the last 3 characters of filenames.
+# The values are callables that accept two string arguments.
+EXTRACTORS = {
+    '.mar': extractMar,
+    '.exe': extractExe,
+}
+
+
+def find_files(d):
+    """yields all of the files in `d'"""
+    for root, dirs, files in os.walk(d):
+        for f in files:
+            yield path.abspath(path.join(root, f))
+
+
+def rchmod(d, mode=0755):
+    """chmods everything in `d' to `mode', including `d' itself"""
+    os.chmod(d, mode)
+    for root, dirs, files in os.walk(d):
+        for item in dirs:
+            os.chmod(path.join(root, item), mode)
+        for item in files:
+            os.chmod(path.join(root, item), mode)
+
+
+def maybe_extract(filename):
+    """If an extractor is found for `filename', extracts it to a temporary
+       directory and chmods it. The consumer is responsible for removing
+       the extracted files, if desired."""
+    ext = path.splitext(filename)[1]
+    if ext not in EXTRACTORS.keys():
+        return None
+    # Append the full filepath to the tempdir
+    tempdir_root = tempfile.mkdtemp()
+    tempdir = path.join(tempdir_root, filename.lstrip('/'))
+    os.makedirs(tempdir)
+    EXTRACTORS[ext](filename, tempdir)
+    rchmod(tempdir_root)
+    return tempdir_root
+
+
+def process(item, command):
+    def format_time(t):
+        return time.strftime("%H:%M:%S", time.localtime(t))
+    # Buffer output to avoid interleaving of multiple workers'
+    logs = []
+    args = [item]
+    proc = None
+    start = time.time()
+    logs.append("START %s: %s" % (format_time(start), item))
+    # If the file was extracted, we need to process all of its files, too.
+    tempdir = maybe_extract(item)
+    if tempdir:
+        for f in find_files(tempdir):
+            args.append(f)
+
+    try:
+        fd = tempfile.TemporaryFile()
+        proc = subprocess.Popen(command + args, stdout=fd)
+        proc.wait()
+        if proc.returncode != 0:
+            raise Exception("returned %s" % proc.returncode)
+    finally:
+        if tempdir:
+            shutil.rmtree(tempdir)
+        fd.seek(0)
+        # rstrip() here to avoid an unnecessary newline, if it exists.
+        logs.append(fd.read().rstrip())
+        end = time.time()
+        elapsed = end - start
+        logs.append("END %s (%d seconds elapsed): %s\n" % (
+            format_time(end), elapsed, item))
+        # Now that we've got all of our output, print it. It's important that
+        # the logging module is used for this, because "print" is not
+        # thread-safe.
+        log.info("\n".join(logs))
+
+
+def worker(command, errors):
+    item = q.get()
+    while item != None:
+        try:
+            process(item, command)
+        except:
+            errors.put(item)
+        item = q.get()
+
+if __name__ == '__main__':
+    # getopt is used in favour of optparse to enable "--" as a separator
+    # between the command and list of files. optparse doesn't allow that.
+    from getopt import getopt
+    options, args = getopt(sys.argv[1:], 'j:h', ['help'])
+
+    concurrency = 1
+    for o, a in options:
+        if o == '-j':
+            concurrency = int(a)
+        elif o in ('-h', '--help'):
+            log.info(__doc__)
+            sys.exit(0)
+
+    if len(args) < 3 or '--' not in args:
+        log.error(__doc__)
+        sys.exit(1)
+
+    command = []
+    while args[0] != "--":
+        command.append(args.pop(0))
+    args.pop(0)
+
+    q = Queue()
+    errors = Queue()
+    threads = []
+    for i in range(concurrency):
+        t = Thread(target=worker, args=(command, errors))
+        t.start()
+        threads.append(t)
+
+    # find_files is a generator, so work will begin prior to it finding
+    # all of the files
+    for arg in args:
+        if path.isfile(arg):
+            q.put(arg)
+        else:
+            for f in find_files(arg):
+                q.put(f)
+    # Because the workers are started before we start populating the q
+    # they can't use .empty() to determine whether or not their done.
+    # We also can't use q.join() or j.task_done(), because we need to
+    # support Python 2.4. We know that find_files won't yield None,
+    # so we can detect doneness by having workers die when they get None
+    # as an item.
+    for i in range(concurrency):
+        q.put(None)
+
+    for t in threads:
+        t.join()
+
+    if not errors.empty():
+        log.error("Command failed for the following files:")
+        while not errors.empty():
+            log.error("  %s" % errors.get())
+        sys.exit(1)
diff --git a/testing/mozharness/scripts/release/beet_mover.py b/testing/mozharness/scripts/release/beet_mover.py
new file mode 100644
index 0000000000..4a0634acd1
--- /dev/null
+++ b/testing/mozharness/scripts/release/beet_mover.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""beet_mover.py.
+
+downloads artifacts, scans them and uploads them to s3
+"""
+import hashlib
+import sys
+import os
+import pprint
+import re
+from os import listdir
+from os.path import isfile, join
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
+from mozharness.base.log import FATAL
+from mozharness.base.python import VirtualenvMixin
+from mozharness.base.script import BaseScript
+import mozharness
+
+
+def get_hash(content, hash_type="md5"):
+    h = hashlib.new(hash_type)
+    h.update(content)
+    return h.hexdigest()
+
+
+def get_aws_auth():
+    """
+    retrieves aws creds and deletes them from os.environ if present.
+    """
+    aws_key_id = os.environ.get("AWS_ACCESS_KEY_ID")
+    aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
+
+    if aws_key_id and aws_secret_key:
+        del os.environ['AWS_ACCESS_KEY_ID']
+        del os.environ['AWS_SECRET_ACCESS_KEY']
+    else:
+        exit("could not determine aws credentials from os environment")
+
+    return aws_key_id, aws_secret_key
+
+
+CONFIG_OPTIONS = [
+    [["--template"], {
+        "dest": "template",
+        "help": "Specify jinja2 template file",
+    }],
+    [['--locale', ], {
+        "action": "extend",
+        "dest": "locales",
+        "type": "string",
+        "help": "Specify the locale(s) to upload."}],
+    [["--platform"], {
+        "dest": "platform",
+        "help": "Specify the platform of the build",
+    }],
+    [["--version"], {
+        "dest": "version",
+        "help": "full release version based on gecko and tag/stage identifier. e.g. '44.0b1'"
+    }],
+    [["--app-version"], {
+        "dest": "app_version",
+        "help": "numbered version based on gecko. e.g. '44.0'"
+    }],
+    [["--partial-version"], {
+        "dest": "partial_version",
+        "help": "the partial version the mar is based off of"
+    }],
+    [["--artifact-subdir"], {
+        "dest": "artifact_subdir",
+        "default": 'build',
+        "help": "subdir location for taskcluster artifacts after public/ base.",
+    }],
+    [["--build-num"], {
+        "dest": "build_num",
+        "help": "the release build identifier"
+    }],
+    [["--taskid"], {
+        "dest": "taskid",
+        "help": "taskcluster task id to download artifacts from",
+    }],
+    [["--production"], {
+        "dest": "production",
+        "default": False,
+        "help": "taskcluster task id to download artifacts from",
+    }],
+    [["--exclude"], {
+        "dest": "excludes",
+        "action": "append",
+        "help": "List of filename patterns to exclude. See script source for default",
+    }],
+    [["-s", "--scan-parallelization"], {
+        "dest": "scan_parallelization",
+        "default": 4,
+        "type": "int",
+        "help": "Number of concurrent file scans",
+    }],
+]
+
+DEFAULT_EXCLUDES = [
+    r"^.*tests.*$",
+    r"^.*crashreporter.*$",
+    r"^.*\.zip(\.asc)?$",
+    r"^.*\.log$",
+    r"^.*\.txt$",
+    r"^.*\.asc$",
+    r"^.*/partner-repacks.*$",
+    r"^.*.checksums(\.asc)?$",
+    r"^.*/logs/.*$",
+    r"^.*/jsshell.*$",
+    r"^.*json$",
+    r"^.*/host.*$",
+    r"^.*/mar-tools/.*$",
+    r"^.*gecko-unsigned-unaligned.apk$",
+    r"^.*robocop.apk$",
+    r"^.*contrib.*"
+]
+CACHE_DIR = 'cache'
+
+
+class BeetMover(BaseScript, VirtualenvMixin, object):
+    def __init__(self, aws_creds):
+        beetmover_kwargs = {
+            'config_options': CONFIG_OPTIONS,
+            'all_actions': [
+                # 'clobber',
+                'create-virtualenv',
+                'activate-virtualenv',
+                'generate-candidates-manifest',
+                'verify-bits',  # beets
+                'download-bits', # beets
+                'scan-bits',     # beets
+                'upload-bits',  # beets
+            ],
+            'require_config_file': False,
+            # Default configuration
+            'config': {
+                # base index url where to find taskcluster artifact based on taskid
+                # TODO - find out if we need to support taskcluster run number other than 0.
+                # e.g. maybe we could end up with artifacts in > 'run 0' in a re-trigger situation?
+                "artifact_base_url": 'https://queue.taskcluster.net/v1/task/{taskid}/runs/0/artifacts/public/{subdir}',
+                "virtualenv_modules": [
+                    "boto",
+                    "PyYAML",
+                    "Jinja2",
+                    "redo",
+                    "mar",
+                ],
+                "virtualenv_path": "venv",
+                'buckets': {
+                    'development': "mozilla-releng-beet-mover-dev",
+                    'production': "mozilla-releng-beet-mover",
+                },
+                'product': 'firefox',
+            },
+        }
+        #todo do excludes need to be configured via command line for specific builds?
+        super(BeetMover, self).__init__(**beetmover_kwargs)
+
+        c = self.config
+        self.manifest = {}
+        # assigned in _post_create_virtualenv
+        self.virtualenv_imports = None
+        self.bucket = c['buckets']['production'] if c['production'] else c['buckets']['development']
+        self.aws_key_id, self.aws_secret_key = aws_creds
+        # if excludes is set from command line, use it otherwise use defaults
+        self.excludes = self.config.get('excludes', DEFAULT_EXCLUDES)
+        dirs = self.query_abs_dirs()
+        self.dest_dir = os.path.join(dirs['abs_work_dir'], CACHE_DIR)
+
+    def activate_virtualenv(self):
+        """
+        activates virtualenv and adds module imports to a instance wide namespace.
+
+        creating and activating a virtualenv onto the currently executing python interpreter is a
+        bit black magic. Rather than having import statements added in various places within the
+        script, we import them here immediately after we activate the newly created virtualenv
+        """
+        VirtualenvMixin.activate_virtualenv(self)
+
+        import boto
+        import yaml
+        import jinja2
+        self.virtualenv_imports = {
+            'boto': boto,
+            'yaml': yaml,
+            'jinja2': jinja2,
+        }
+        self.log("activated virtualenv with the modules: {}".format(str(self.virtualenv_imports)))
+
+    def generate_candidates_manifest(self):
+        """
+        generates and outputs a manifest that maps expected Taskcluster artifact names
+        to release deliverable names
+        """
+        self.log('generating manifest from {}...'.format(self.config['template']))
+        template_dir, template_file = os.path.split(os.path.abspath(self.config['template']))
+        jinja2 = self.virtualenv_imports['jinja2']
+        yaml = self.virtualenv_imports['yaml']
+
+        jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir),
+                                       undefined=jinja2.StrictUndefined)
+        template = jinja_env.get_template(template_file)
+        template_vars = {
+            "platform": self.config['platform'],
+            "locales": self.config['locales'],
+            "version": self.config['version'],
+            "app_version": self.config.get('app_version', ''),
+            "partial_version": self.config.get('partial_version', ''),
+            "build_num": self.config['build_num'],
+            # mirror current release folder structure
+            "s3_prefix": 'pub/{}/candidates'.format(self.config['product']),
+            "artifact_base_url": self.config['artifact_base_url'].format(
+                    taskid=self.config['taskid'], subdir=self.config['artifact_subdir']
+            )
+        }
+        self.manifest = yaml.safe_load(template.render(**template_vars))
+
+        self.log("manifest generated:")
+        self.log(pprint.pformat(self.manifest['mapping']))
+
+    def verify_bits(self):
+        """
+        inspects each artifact and verifies that they were created by trustworthy tasks
+        """
+        # TODO
+        self.log('skipping verification. unimplemented...')
+
+    def download_bits(self):
+        """
+        downloads list of artifacts to self.dest_dir dir based on a given manifest
+        """
+        self.log('downloading and uploading artifacts to self_dest_dir...')
+
+        # TODO - do we want to mirror/upload to more than one region?
+        dirs = self.query_abs_dirs()
+
+        for locale in self.manifest['mapping']:
+            for deliverable in self.manifest['mapping'][locale]:
+                self.log("downloading '{}' deliverable for '{}' locale".format(deliverable, locale))
+                # download locally to working dir
+                source=self.manifest['mapping'][locale][deliverable]['artifact']
+                file_name = self.retry(self.download_file,
+                    args=[source],
+                    kwargs={'parent_dir': dirs['abs_work_dir']},
+                    error_level=FATAL)
+        self.log('Success!')
+
+    def upload_bits(self):
+        """
+        uploads list of artifacts to s3 candidates dir based on a given manifest
+        """
+        self.log('uploading artifacts to s3...')
+        dirs = self.query_abs_dirs()
+
+        # connect to s3
+        boto = self.virtualenv_imports['boto']
+        conn = boto.connect_s3(self.aws_key_id, self.aws_secret_key)
+        bucket = conn.get_bucket(self.bucket)
+
+        #todo change so this is not every entry in manifest - should exclude those that don't pass virus sign
+        #not sure how to determine this
+        for locale in self.manifest['mapping']:
+            for deliverable in self.manifest['mapping'][locale]:
+                self.log("uploading '{}' deliverable for '{}' locale".format(deliverable, locale))
+                #we have already downloaded the files locally so we can use that version
+                source = self.manifest['mapping'][locale][deliverable]['artifact']
+                downloaded_file = os.path.join(dirs['abs_work_dir'], self.get_filename_from_url(source))
+                self.upload_bit(
+                    source=downloaded_file,
+                    s3_key=self.manifest['mapping'][locale][deliverable]['s3_key'],
+                    bucket=bucket,
+                )
+        self.log('Success!')
+
+
+    def upload_bit(self, source, s3_key, bucket):
+        # TODO - do we want to mirror/upload to more than one region?
+        dirs = self.query_abs_dirs()
+        boto = self.virtualenv_imports['boto']
+
+        #todo need to copy from dir to s3
+
+        self.info('uploading to s3 with key: {}'.format(s3_key))
+        key = boto.s3.key.Key(bucket)  # create new key
+        key.key = s3_key  # set key name
+
+        self.info("Checking if `{}` already exists".format(s3_key))
+        key = bucket.get_key(s3_key)
+        if not key:
+            self.info("Uploading to `{}`".format(s3_key))
+            key = bucket.new_key(s3_key)
+
+            # set key value
+            self.retry(key.set_contents_from_filename, args=[source], error_level=FATAL),
+
+            # key.make_public() may lead to race conditions, because
+            # it doesn't pass version_id, so it may not set permissions
+            bucket.set_canned_acl(acl_str='public-read', key_name=s3_key,
+                                  version_id=key.version_id)
+        else:
+            if not get_hash(key.get_contents_as_string()) == get_hash(open(source).read()):
+                # for now, let's halt. If necessary, we can revisit this and allow for overwrites
+                #  to the same buildnum release with different bits
+                self.fatal("`{}` already exists with different checksum.".format(s3_key))
+            self.log("`{}` has the same MD5 checksum, not uploading".format(s3_key))
+
+    def scan_bits(self):
+
+        dirs = self.query_abs_dirs()
+
+        filenames = [f for f in listdir(dirs['abs_work_dir']) if isfile(join(dirs['abs_work_dir'], f))]
+        self.mkdir_p(self.dest_dir)
+        for file_name in filenames:
+            if self._matches_exclude(file_name):
+                self.info("Excluding {} from virus scan".format(file_name))
+            else:
+                self.info('Copying {} to {}'.format(file_name,self.dest_dir))
+                self.copyfile(os.path.join(dirs['abs_work_dir'], file_name), os.path.join(self.dest_dir,file_name))
+        self._scan_files()
+        self.info('Emptying {}'.format(self.dest_dir))
+        self.rmtree(self.dest_dir)
+
+    def _scan_files(self):
+        """Scan the files we've collected. We do the download and scan concurrently to make
+        it easier to have a coherent log afterwards. Uses the venv python."""
+        external_tools_path = os.path.join(
+                              os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), 'external_tools')
+        self.run_command([self.query_python_path(), os.path.join(external_tools_path,'extract_and_run_command.py'),
+                         '-j{}'.format(self.config['scan_parallelization']),
+                         'clamscan', '--no-summary', '--', self.dest_dir])
+
+    def _matches_exclude(self, keyname):
+         return any(re.search(exclude, keyname) for exclude in self.excludes)
+
+if __name__ == '__main__':
+    beet_mover = BeetMover(get_aws_auth())
+    beet_mover.run_and_exit()
diff --git a/xpcom/threads/LazyIdleThread.h b/xpcom/threads/LazyIdleThread.h
index 44ca824d0a..77b38ce8b3 100644
--- a/xpcom/threads/LazyIdleThread.h
+++ b/xpcom/threads/LazyIdleThread.h
@@ -45,10 +45,7 @@ public:
   NS_DECL_NSITIMERCALLBACK
   NS_DECL_NSITHREADOBSERVER
   NS_DECL_NSIOBSERVER
-  // missing from NS_DECL_NSIEVENTTARGET because MSVC
-  nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-    return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-  }
+  using nsIEventTarget::Dispatch;
 
   enum ShutdownMethod
   {
diff --git a/xpcom/threads/SharedThreadPool.h b/xpcom/threads/SharedThreadPool.h
index 8212647991..8a90dbcd38 100644
--- a/xpcom/threads/SharedThreadPool.h
+++ b/xpcom/threads/SharedThreadPool.h
@@ -54,10 +54,6 @@ public:
   // Forward behaviour to wrapped thread pool implementation.
   NS_FORWARD_SAFE_NSITHREADPOOL(mPool);
 
-  // See bug 1155059 - MSVC forces us to not declare Dispatch normally in idl
-  //  NS_FORWARD_SAFE_NSIEVENTTARGET(mEventTarget);
-  nsresult Dispatch(nsIRunnable *event, uint32_t flags) { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->Dispatch(event, flags); }
- 
   NS_IMETHOD DispatchFromScript(nsIRunnable *event, uint32_t flags) override {
       return Dispatch(event, flags);
   }
@@ -65,6 +61,8 @@ public:
   NS_IMETHOD Dispatch(already_AddRefed<nsIRunnable>&& event, uint32_t flags) override
     { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->Dispatch(Move(event), flags); }
 
+  using nsIEventTarget::Dispatch;
+
   NS_IMETHOD IsOnCurrentThread(bool *_retval) override { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->IsOnCurrentThread(_retval); }
 
   // Creates necessary statics. Called once at startup.
diff --git a/xpcom/threads/nsIEventTarget.idl b/xpcom/threads/nsIEventTarget.idl
index bfff6d171f..c3e4d3c329 100644
--- a/xpcom/threads/nsIEventTarget.idl
+++ b/xpcom/threads/nsIEventTarget.idl
@@ -13,15 +13,10 @@
 
 native alreadyAddRefed_nsIRunnable(already_AddRefed<nsIRunnable>&&);
 
-[scriptable, uuid(f9d60700-e6dc-4a72-9537-689058655472)]
+[scriptable, uuid(88145945-3278-424e-9f37-d874cbdd9f6f)]
 interface nsIEventTarget : nsISupports
 {
   /* until we can get rid of all uses, keep the non-alreadyAddRefed<> version */
-  /**
-   * This must be non-virtual due to issues with MSVC 2013's ordering of
-   * vtbls for overloads.  With other platforms we can leave this virtual
-   * and avoid adding lots of Dispatch() methods to classes inheriting this.
-   */
 %{C++
     nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
       return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
diff --git a/xpcom/threads/nsIThread.idl b/xpcom/threads/nsIThread.idl
index e9bd22c59c..36fd9af152 100644
--- a/xpcom/threads/nsIThread.idl
+++ b/xpcom/threads/nsIThread.idl
@@ -17,7 +17,7 @@
  *
  * See nsIThreadManager for the API used to create and locate threads.
  */
-[scriptable, uuid(594feb13-6164-4054-b5a1-ad62e10ea15d)]
+[scriptable, uuid(5801d193-29d1-4964-a6b7-70eb697ddf2b)]
 interface nsIThread : nsIEventTarget
 {
   /**
diff --git a/xpcom/threads/nsIThreadInternal.idl b/xpcom/threads/nsIThreadInternal.idl
index 9287bf5d79..e5d7cc83ff 100644
--- a/xpcom/threads/nsIThreadInternal.idl
+++ b/xpcom/threads/nsIThreadInternal.idl
@@ -13,7 +13,7 @@ interface nsIThreadObserver;
  * The XPCOM thread object implements this interface, which allows a consumer
  * to observe dispatch activity on the thread.
  */
-[scriptable, uuid(9cc51754-2eb3-4b46-ae99-38a61881c622)]
+[scriptable, uuid(a3a72e5f-71d9-4add-8f30-59a78fb6d5eb)]
 interface nsIThreadInternal : nsIThread
 {
   /**
diff --git a/xpcom/threads/nsIThreadPool.idl b/xpcom/threads/nsIThreadPool.idl
index 77a124a85e..d04e8504a6 100644
--- a/xpcom/threads/nsIThreadPool.idl
+++ b/xpcom/threads/nsIThreadPool.idl
@@ -27,7 +27,7 @@ interface nsIThreadPoolListener : nsISupports
  * anonymous (unnamed) worker threads.  An event dispatched to the thread pool
  * will be run on the next available worker thread.
  */
-[scriptable, uuid(cacd4a2e-2655-4ff8-894c-10c15883cd0a)]
+[scriptable, uuid(76ce99c9-8e43-489a-9789-f27cc4424965)]
 interface nsIThreadPool : nsIEventTarget
 {
   /**
diff --git a/xpcom/threads/nsThread.h b/xpcom/threads/nsThread.h
index b2ba05b360..e1abae918c 100644
--- a/xpcom/threads/nsThread.h
+++ b/xpcom/threads/nsThread.h
@@ -33,10 +33,7 @@ public:
   NS_DECL_NSITHREAD
   NS_DECL_NSITHREADINTERNAL
   NS_DECL_NSISUPPORTSPRIORITY
-  // missing from NS_DECL_NSIEVENTTARGET because MSVC
-  nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-    return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-  }
+  using nsIEventTarget::Dispatch;
 
   enum MainThreadFlag
   {
diff --git a/xpcom/threads/nsThreadPool.h b/xpcom/threads/nsThreadPool.h
index b32ad191a8..9552738b36 100644
--- a/xpcom/threads/nsThreadPool.h
+++ b/xpcom/threads/nsThreadPool.h
@@ -28,10 +28,7 @@ public:
   NS_DECL_NSIEVENTTARGET
   NS_DECL_NSITHREADPOOL
   NS_DECL_NSIRUNNABLE
-  // missing from NS_DECL_NSIEVENTTARGET because MSVC
-  nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) {
-    return Dispatch(nsCOMPtr<nsIRunnable>(aEvent).forget(), aFlags);
-  }
+  using nsIEventTarget::Dispatch;
 
   nsThreadPool();