From dcf417821219b618a6ce2f17dd5b198fa5d0b369 Mon Sep 17 00:00:00 2001 From: roytam1 Date: Wed, 22 Nov 2023 17:12:24 +0800 Subject: [PATCH] import changes from `dev' branch of rmottola/Arctic-Fox: - Bug 1218706 - Make 'unicode-bidi: isolate' the default for elements with a dir attribute. r=dbaron,jfkthame (037a5aaf57) - Bug 743198 part 7 - Add :fullscreen pseudo class. r=heycam (05eb82ccc9) - missing bits of Bug 1242904 - Update Brotli t (f4f569d859) - Bug 1235298 - Annotate intentional switch fallthroughs to suppress -Wimplicit-fallthrough warnings in netwerk/. r=mcmanus (c4ae414769) - Bug 1244841 - Don't measure SharedArrayBuffer objects multiple times. r=lth. (e9b665dc82) - Bug 234031 - broken parsing of xlen in gzip rfc 1952 r=mcmanus (c905b50417) - Bug 1239605 - filter insertions into the global object. r=jandem (35520bc5c8) - Bug 1248681 - Warn if the result of putWrapper goes unused; r=sfink (8051888ef8) - Bug 1246293 - Fix a typo in DefineSelfHostedProperty; r=till (5e05a76b2a) - Bug 1243808 - Allow modules to be compiled off main thread r=shu (51e5adca0a) - Bug 1241767 - avoid default-only switch to placate MSVC. r=bbouvier (392aa8eda0) - Bug 1239710 - Use CountingSort for Uint8Array and Int8Array; r=mrrrgn (6f394ab442) - Bug 1246860 - Preserve holes when sorting arrays with a custom comparator. r=till (725b091e55) - Bug 1247283 - Improve self-hosted Array.sort performance; r=till (ab1f3ddb74) - Bug 1248717 - Don't initialize variables in a for head with var, then use them later. r=jorendorff (87a77f9623) - Bug 1242196 - Use RadixSort for typed arrays. r=mrrrgn (a280ea097d) - Bug 1246927 - Fix OOM handling in usages of GlobalObject::maybeGetIntrinsicValue. r=arai (f7f3761c45) - Bug 1248405 - Recover from JSObject::getGroup OOM in CanOptimizeForDenseStorage. r=jandem (68ea32c044) - Bug 1225905 - create a mozharness script that manages each beet mover task logic, NPOTB DONTBUILD r=rail (076f25536f) - Bug 1246074 - add partials template config for mozharness beetmover, DONTBUILD r=rail (2e42c78d92) - Bug 1246074 - add partials mozharness beetmover, custom tc artifact location, DONTBUILD NPOTB r=rail (c74fe0755a) - Bug 1210538 - Add antivirus checks to release promotion graph a=rail (56de774389) - Bug 1238610 - Restore compartment debug mode flags if adding a debuggee global fails r=jimb (f8a9a1fa85) - Bug 1243851 - Treat enter as shift+enter if input is valid but incomplete; r=fitzgen, bgrins (f27c959bc0) - Bug 1238610 - Fix implicit constructor errors r=me (6f26f92763) - Bug 1242111 - Handle OOM in UpdateExecutionObservabilityOfScriptsInZone. (r=jimb) (91b125725f) - Bug 1240503 - Skip the initial block scope when unwinding scopes due to an exception that's thrown in the prologue before the scope chain is properly initialized for a script that starts with a block scope. (r=jorendorff) (0247fc8848) - Bug 1242111 - Fix references to oomTest. (r=me) (a9dc13a648) - Bug 1240546 - Handle OOM in updateObservesAllExecutionOnDebuggees. (r=jimb) (5d7e3a49f4) - Bug 1245961 - Throw a TypeError if less than one argument is supplied to isCompilableUnit;r=fitzgen (f896abb042) - Bug 1240803 - Handle OOM in replaceFrameGuts. (r=jimb) (2d43384c72) - Bug 1248094 - Simplify PCLocationMap with GCHashMap; r=fitzgen (b28d983bbd) - we don't care much abut 68k sadly (51c50300c5) - Bug 1233786 - JSScript::initScriptCounts should report OOMs. r=bhackett (eb42f7b8c1) - Bug 1233178 - Move ScriptCounts allocation outside the HashMap. r=bhackett (c3fa6d487c) - Bug 1141579 - Synchronize access to warmUpCount; r=jandem (a5b72cdf94) - Bug 1203696: Improve comments about lazily-initialized member of js::LazyScript. r=shu (caa895612d) - Bug 1221992 - Fix test using GetMostRecentWindow from the child process. r=smaug (07affe8195) - Bug 1235636 - rewrite PCToLineNumber; r=fitzgen (9dc9ff013e) - Bug 1232100 - "Check charsWritten in non-debug builds.". r=jcoppeard (fc5a64e621) - last bit of Bug 1197932 (86277af34e) - Bug 1067049 - Implement arguments[@@iterator]. r=evilpie (543e513269) - Bug 1248930 - Use Int32Value in ArrayBufferObject::BYTE_LENGTH_SLOT. r=lth (71e3a9ee51) - Bug 1113685 - Report the right name when calling selfhosted functions on incompatible objects. r=till (51f68d4f8d) - some symbol cleanup (83fca10034) - Bug 1165011 - Remove Symbol_isRegExp. r=jorendorff (46a2d293cc) - Bug 1122900: Make libyuv compile with MSVC 2015, r=rjesup. (9e147c7ba7) - bug 1241453 - clean up GetAccessibleWrap() r=davidb (01e37c5012) - Bug 1243331 - Prevent G_DEFINE_TYPE_EXTENDED macro from producing a fatal warning, r=tbsaunde (8bf031c4b9) - Bug 1232527 - Call into WMF PDM to determine if WMF can decode instead of using GMPVideoDecoderTrialCreator. r=jwwang (7d2b1f16f1) - Bug 1229475 - Fix gen-sources for libopus 1.1.1. r=cpearce (1e5a768d94) - Bug 1229475 - Update libopus to 1.1.1 release. r=jmspeex (0b73488ab3) - Bug 1139087 - Add moz.build bugzilla metadata for codecs. r=kinetik,gps (3d906f8f5a) - Bug 1229475 - libopus: Patch out asm flags for run_analysis. r=jmspeex (bfa15edac1) - Bug 1229475 - Fix unified build. r=cpearce (bbeda94cfc) - Bug 1239078 - Update libopus to 1.1.2 release. r=kinetik (9990b00867) - Bug 1239078 - Bump libopus update script for 1.1.2. r=kinetik (eecd46d3d3) - bug 1230377 - part 1/2: ensure nsKeyObject releases NSS resources on shutdown r=jcj (9ceefecbea) - bug 1230377 - part 2/2: simplify nsIKeyObject and nsIKeyObjectFactory r=jcj (1297d168b7) - bug 1239609 - audit nsNSSShutDownObject destructors for correctness r=Cykesiopka,sworkman (c78404e52a) - Bug 1246263 - fix unified build pollution r=valentin (f8db2c45cf) - Bug 1214981 - Disable output stream buffering. r=keeler (d9e7a1b863) - bug 1240173 - improve nsIX509Cert.dbKey r=Cykesiopka (0c0fc8e8a3) - Bug 1238042 - Extract a helper function to check if a JSObject is a global with a particular about: URI. r=ehsan (d065854725) - Bug 1244118 - Shutdown threadpool when xpcom-shutdown-threads happened. r=roc (e6ef2768b6) - Bug 1201685 - Limit the number of indexedDB open() calls in IndexedDBHelper r=gwagner (a4fc80fca2) - Bug 1244049 - Part 3: Replace the type of nsCSSSelector::mPseudoType. r=dbaron (c817ee6145) - Bug 1244049 - Part 4: Define CSSPseudoElementTypeBase. r=dbaron (94dab59375) - Bug 1246846 (part 1) - Avoid nsTHashtable::RawRemoveEntry() in dom/. r=bz. (5371e478da) - Bug 1246846 (part 2) - Avoid nsTHashtable::RawRemoveEntry() in nsPermissionManager. r=mconnor. (d7a1143ed1) - Bug 1246846 (part 3) - Avoid nsTHashtable::RawRemoveEntry() in gfxFontconfigUtils. r=jfkthame. (d23259ca8e) - Bug 1246846 (part 4) - Avoid nsTHashtable::RawRemoveEntry() in FramePropertyTable. r=roc. (7de416abfa) - Bug 1238404 - Use 'using' directive instead of having separate Dispatch impl in subclasses of nsIEventTarget. r=froydnj (43028ed3b3) - Bug 938699 - Remove FindElementWithViewId from nsIDOMWindowUtils.idl and nsDOMWindowUtils.cpp. r=kats (b49d2b5e6a) - missing bit of Bug 1210294 - Remove the release-mode IsCallerChrome assertions (a555243280) --- accessible/atk/AccessibleWrap.cpp | 14 +- accessible/atk/moz.build | 7 +- dom/animation/PseudoElementHashEntry.h | 2 + dom/base/FeedWriterEnabled.h | 33 +- dom/base/ImageEncoder.cpp | 38 +- dom/base/ImageEncoder.h | 1 + dom/base/IndexedDBHelper.jsm | 42 +- dom/base/ShadowRoot.cpp | 2 +- dom/base/WebSocket.cpp | 5 +- dom/base/nsContentUtils.cpp | 34 + dom/base/nsContentUtils.h | 8 + dom/base/nsDOMWindowUtils.cpp | 12 - dom/base/nsDocument.cpp | 2 +- ..._exception_options_from_jsimplemented.html | 4 +- ...promise_rejections_from_jsimplemented.html | 4 +- dom/interfaces/base/nsIDOMWindowUtils.idl | 7 - dom/media/platforms/wmf/WMFDecoderModule.cpp | 16 +- dom/media/platforms/wmf/WMFDecoderModule.h | 9 + dom/wifi/WifiCertService.cpp | 6 + dom/xul/XULDocument.cpp | 2 +- extensions/cookie/nsPermissionManager.cpp | 4 +- gfx/thebes/gfxFontconfigUtils.cpp | 8 +- ipc/chromium/src/build/build_config.h | 5 +- js/public/MemoryMetrics.h | 5 +- js/src/builtin/ModuleObject.cpp | 68 +- js/src/builtin/ModuleObject.h | 15 +- js/src/builtin/Sorting.js | 209 ++++- js/src/builtin/TypedArray.js | 17 +- js/src/doc/Debugger/Debugger.md | 10 + js/src/frontend/BytecodeCompiler.cpp | 24 +- js/src/frontend/BytecodeCompiler.h | 6 +- js/src/gc/Zone.h | 11 +- .../tests/collections/constructor-errors.js | 5 +- .../tests/debug/Debugger-isCompilableUnit.js | 58 ++ .../Script-getOffsetsCoverage-bug1233178.js | 13 + js/src/jit-test/tests/debug/bug-1238610.js | 27 + js/src/jit-test/tests/debug/bug1240546.js | 12 + js/src/jit-test/tests/debug/bug1240803.js | 24 + js/src/jit-test/tests/debug/bug1242111.js | 11 + js/src/jit-test/tests/for-of/non-iterable.js | 3 - js/src/jit-test/tests/gc/bug-1240503.js | 8 + js/src/jit-test/tests/ion/bug832058.js | 3 +- js/src/jit-test/tests/modules/global-scope.js | 32 + .../tests/modules/off-thread-compile.js | 16 + .../method-called-on-incompatible.js | 9 + js/src/jsapi.cpp | 32 +- js/src/jsarray.cpp | 6 +- js/src/jscompartment.cpp | 5 +- js/src/jscompartment.h | 23 +- js/src/jsobj.cpp | 2 +- js/src/jsprf.cpp | 2 +- js/src/jsscript.cpp | 77 +- js/src/jsscript.h | 17 +- js/src/jsversion.h | 5 +- js/src/shell/js.cpp | 116 ++- .../tests/ecma_6/Array/iterator_edge_cases.js | 2 +- js/src/tests/ecma_6/Array/sort_holes.js | 72 ++ .../ecma_6/Function/arguments-iterator.js | 167 ++++ .../ecma_6/String/iterator_edge_cases.js | 2 +- js/src/vm/ArgumentsObject.cpp | 43 + js/src/vm/ArgumentsObject.h | 20 +- js/src/vm/ArrayBufferObject.cpp | 13 +- js/src/vm/ArrayBufferObject.h | 4 +- js/src/vm/CallNonGenericMethod.cpp | 4 + js/src/vm/CommonPropertyNames.h | 10 +- js/src/vm/Debugger.cpp | 169 +++- js/src/vm/Debugger.h | 2 + js/src/vm/GlobalObject.cpp | 42 +- js/src/vm/GlobalObject.h | 44 +- js/src/vm/HelperThreads.cpp | 257 ++++-- js/src/vm/HelperThreads.h | 44 +- js/src/vm/NativeObject.cpp | 5 + js/src/vm/SavedStacks.cpp | 22 +- js/src/vm/SavedStacks.h | 35 +- js/src/vm/ScopeObject.cpp | 13 + js/src/vm/SelfHosting.cpp | 93 ++ js/src/vm/SelfHosting.h | 10 +- js/src/vm/SharedArrayObject.cpp | 16 +- js/src/vm/SharedArrayObject.h | 6 +- js/src/vm/Stack.cpp | 1 + js/xpconnect/src/XPCJSRuntime.cpp | 24 +- layout/base/FramePropertyTable.cpp | 8 +- layout/base/FramePropertyTable.h | 3 + layout/inspector/inDOMUtils.cpp | 5 +- layout/media/symbols.def.in | 1 + layout/reftests/bidi/613157-2-ref.html | 2 +- layout/reftests/bidi/698706-1-ref.html | 2 +- layout/reftests/bidi/83958-2c.html | 33 - layout/reftests/bidi/reftest.list | 3 +- layout/style/StyleRule.cpp | 6 +- layout/style/StyleRule.h | 12 +- layout/style/html.css | 113 +-- layout/style/nsCSSPseudoClassList.h | 1 + layout/style/nsCSSPseudoElements.cpp | 9 +- layout/style/nsCSSPseudoElements.h | 3 +- layout/style/nsCSSRuleProcessor.cpp | 13 +- layout/style/nsStyleContext.cpp | 3 +- layout/style/test/test_default_bidi_css.html | 12 +- layout/style/ua.css | 4 +- .../0.1/ClearKeySessionManager.cpp | 23 +- media/libogg/moz.build | 3 + media/libopus/README_MOZILLA | 2 +- media/libopus/celt/_kiss_fft_guts.h | 7 +- media/libopus/celt/arch.h | 33 +- media/libopus/celt/arm/arm2gnu.pl | 53 +- media/libopus/celt/arm/arm_celt_map.c | 82 +- media/libopus/celt/arm/armcpu.c | 6 +- media/libopus/celt/arm/celt_ne10_fft.c | 174 ++++ media/libopus/celt/arm/celt_ne10_mdct.c | 258 ++++++ media/libopus/celt/arm/celt_neon_intr.c | 252 +++++ media/libopus/celt/arm/celt_pitch_xcorr_arm.s | 24 +- media/libopus/celt/arm/fft_arm.h | 72 ++ media/libopus/celt/arm/fixed_armv4.h | 4 + media/libopus/celt/arm/fixed_armv5e.h | 35 + media/libopus/celt/arm/mdct_arm.h | 60 ++ media/libopus/celt/arm/pitch_arm.h | 15 +- media/libopus/celt/bands.c | 235 ++--- media/libopus/celt/bands.h | 24 +- media/libopus/celt/celt.c | 96 +- media/libopus/celt/celt.h | 23 +- media/libopus/celt/celt_decoder.c | 363 ++++---- media/libopus/celt/celt_encoder.c | 386 ++++---- media/libopus/celt/celt_lpc.c | 16 +- media/libopus/celt/celt_lpc.h | 19 +- media/libopus/celt/cpu_support.h | 20 +- media/libopus/celt/cwrs.c | 40 +- media/libopus/celt/cwrs.h | 2 +- media/libopus/celt/entcode.c | 60 ++ media/libopus/celt/entcode.h | 35 + media/libopus/celt/entdec.c | 2 +- media/libopus/celt/entenc.c | 4 +- media/libopus/celt/fixed_debug.h | 11 + media/libopus/celt/fixed_generic.h | 19 +- media/libopus/celt/float_cast.h | 4 +- media/libopus/celt/kiss_fft.c | 497 ++++------ media/libopus/celt/kiss_fft.h | 75 +- media/libopus/celt/mdct.c | 210 +++-- media/libopus/celt/mdct.h | 56 +- media/libopus/celt/mips/celt_mipsr1.h | 151 +++ .../libopus/celt/mips/fixed_generic_mipsr1.h | 126 +++ media/libopus/celt/mips/kiss_fft_mipsr1.h | 167 ++++ media/libopus/celt/mips/mdct_mipsr1.h | 288 ++++++ media/libopus/celt/mips/pitch_mipsr1.h | 161 ++++ media/libopus/celt/mips/vq_mipsr1.h | 125 +++ media/libopus/celt/modes.c | 8 +- media/libopus/celt/modes.h | 8 - media/libopus/celt/os_support.h | 6 +- media/libopus/celt/pitch.c | 77 +- media/libopus/celt/pitch.h | 67 +- media/libopus/celt/quant_bands.c | 2 +- media/libopus/celt/rate.c | 9 +- media/libopus/celt/rate.h | 2 +- media/libopus/celt/stack_alloc.h | 8 +- media/libopus/celt/static_modes_fixed.h | 621 +++++++++---- .../celt/static_modes_fixed_arm_ne10.h | 388 ++++++++ media/libopus/celt/static_modes_float.h | 613 +++++++++---- .../celt/static_modes_float_arm_ne10.h | 404 +++++++++ media/libopus/celt/vq.c | 55 +- media/libopus/celt/vq.h | 9 +- media/libopus/celt/x86/celt_lpc_sse.c | 132 +++ media/libopus/celt/x86/celt_lpc_sse.h | 68 ++ media/libopus/celt/x86/pitch_sse.c | 185 ++++ media/libopus/celt/x86/pitch_sse.h | 260 +++--- media/libopus/celt/x86/pitch_sse2.c | 95 ++ media/libopus/celt/x86/pitch_sse4_1.c | 195 ++++ media/libopus/celt/x86/x86_celt_map.c | 155 ++++ media/libopus/celt/x86/x86cpu.c | 157 ++++ media/libopus/celt/x86/x86cpu.h | 93 ++ media/libopus/gen-sources.py | 2 +- media/libopus/include/opus.h | 5 +- media/libopus/include/opus_defines.h | 97 +- media/libopus/include/opus_multistream.h | 6 +- media/libopus/moz.build | 4 +- media/libopus/nonunified.patch | 38 + media/libopus/silk/A2NLSF.c | 19 +- media/libopus/silk/API.h | 3 +- media/libopus/silk/CNG.c | 21 +- media/libopus/silk/LPC_analysis_filter.c | 6 +- media/libopus/silk/NLSF_del_dec_quant.c | 40 +- media/libopus/silk/NSQ.c | 13 +- media/libopus/silk/NSQ_del_dec.c | 9 +- media/libopus/silk/PLC.c | 66 +- media/libopus/silk/PLC.h | 3 +- media/libopus/silk/SigProc_FIX.h | 29 +- media/libopus/silk/VAD.c | 9 +- media/libopus/silk/VQ_WMat_EC.c | 10 +- media/libopus/silk/code_signs.c | 4 +- media/libopus/silk/control_SNR.c | 5 - media/libopus/silk/control_codec.c | 10 +- media/libopus/silk/dec_API.c | 42 +- media/libopus/silk/decode_core.c | 5 +- media/libopus/silk/decode_frame.c | 25 +- media/libopus/silk/decode_pulses.c | 10 +- media/libopus/silk/define.h | 2 +- media/libopus/silk/enc_API.c | 17 +- media/libopus/silk/encode_pulses.c | 6 +- .../silk/fixed/LTP_analysis_filter_FIX.c | 19 +- media/libopus/silk/fixed/burg_modified_FIX.c | 36 +- media/libopus/silk/fixed/corrMatrix_FIX.c | 10 +- media/libopus/silk/fixed/encode_frame_FIX.c | 14 +- media/libopus/silk/fixed/find_LPC_FIX.c | 2 +- media/libopus/silk/fixed/find_LTP_FIX.c | 7 +- .../libopus/silk/fixed/find_pitch_lags_FIX.c | 2 +- .../libopus/silk/fixed/find_pred_coefs_FIX.c | 15 +- media/libopus/silk/fixed/main_FIX.h | 23 +- .../mips/noise_shape_analysis_FIX_mipsr1.h | 336 +++++++ .../silk/fixed/mips/prefilter_FIX_mipsr1.h | 184 ++++ .../mips/warped_autocorrelation_FIX_mipsr1.h | 165 ++++ .../silk/fixed/noise_shape_analysis_FIX.c | 6 + .../silk/fixed/pitch_analysis_core_FIX.c | 22 +- media/libopus/silk/fixed/prefilter_FIX.c | 16 +- .../libopus/silk/fixed/residual_energy_FIX.c | 5 +- media/libopus/silk/fixed/structs_FIX.h | 1 + media/libopus/silk/fixed/vector_ops_FIX.c | 10 +- .../silk/fixed/warped_autocorrelation_FIX.c | 7 + .../silk/fixed/x86/burg_modified_FIX_sse.c | 375 ++++++++ .../silk/fixed/x86/prefilter_FIX_sse.c | 160 ++++ .../silk/fixed/x86/vector_ops_FIX_sse.c | 88 ++ media/libopus/silk/float/encode_frame_FLP.c | 2 +- media/libopus/silk/float/find_LPC_FLP.c | 2 +- .../libopus/silk/float/find_pred_coefs_FLP.c | 7 +- media/libopus/silk/float/main_FLP.h | 3 +- .../silk/float/pitch_analysis_core_FLP.c | 4 +- media/libopus/silk/float/structs_FLP.h | 1 + media/libopus/silk/float/wrappers_FLP.c | 9 +- media/libopus/silk/log2lin.c | 6 +- media/libopus/silk/macros.h | 40 +- media/libopus/silk/main.h | 55 +- media/libopus/silk/mips/NSQ_del_dec_mipsr1.h | 405 +++++++++ media/libopus/silk/mips/macros_mipsr1.h | 92 ++ media/libopus/silk/mips/sigproc_fix_mipsr1.h | 65 ++ media/libopus/silk/quant_LTP_gains.c | 25 +- media/libopus/silk/resampler_rom.c | 54 +- media/libopus/silk/shell_coder.c | 8 +- media/libopus/silk/structs.h | 2 +- media/libopus/silk/sum_sqr_shift.c | 1 + media/libopus/silk/tables.h | 6 +- media/libopus/silk/tuning_parameters.h | 2 +- media/libopus/silk/x86/NSQ_del_dec_sse.c | 857 ++++++++++++++++++ media/libopus/silk/x86/NSQ_sse.c | 720 +++++++++++++++ media/libopus/silk/x86/SigProc_FIX_sse.h | 94 ++ media/libopus/silk/x86/VAD_sse.c | 277 ++++++ media/libopus/silk/x86/VQ_WMat_EC_sse.c | 142 +++ media/libopus/silk/x86/main_sse.h | 276 ++++++ media/libopus/silk/x86/x86_silk_map.c | 174 ++++ media/libopus/sources.mozbuild | 44 +- media/libopus/src/analysis.c | 44 +- media/libopus/src/analysis.h | 17 +- media/libopus/src/mlp.c | 117 +-- media/libopus/src/mlp.h | 8 +- media/libopus/src/mlp_data.c | 4 + media/libopus/src/opus.c | 21 + media/libopus/src/opus_decoder.c | 91 +- media/libopus/src/opus_encoder.c | 101 ++- media/libopus/src/opus_multistream_decoder.c | 4 +- media/libopus/src/opus_multistream_encoder.c | 102 ++- media/libopus/src/opus_private.h | 19 +- media/libopus/src/repacketizer.c | 5 +- media/libopus/update.sh | 3 +- media/libvpx/vp8/common/postproc.c | 2 +- media/libyuv/source/compare_win.cc | 8 +- media/libyuv/source/rotate.cc | 4 +- media/libyuv/source/row_win.cc | 288 +++--- media/libyuv/source/scale_win.cc | 54 +- netwerk/base/BackgroundFileSaver.cpp | 6 + netwerk/base/nsSocketTransportService2.h | 5 +- netwerk/base/nsStreamTransportService.h | 5 +- netwerk/base/nsURLHelper.cpp | 2 +- netwerk/cache/nsDiskCacheDevice.cpp | 21 +- netwerk/cache2/CacheHashUtils.cpp | 25 +- netwerk/cache2/CacheIndex.cpp | 6 +- netwerk/cache2/CacheStorageService.cpp | 2 +- netwerk/cookie/nsCookieService.cpp | 9 +- netwerk/protocol/http/Http2Stream.cpp | 3 +- netwerk/protocol/http/SpdyStream31.cpp | 2 +- netwerk/protocol/http/nsHttpTransaction.cpp | 3 +- .../protocol/websocket/WebSocketChannel.cpp | 1 + .../converters/mozTXTToHTMLConv.cpp | 4 +- .../converters/nsHTTPCompressConv.cpp | 8 +- release/docker/beet-mover/Dockerfile | 19 + .../manager/ssl/nsCertOverrideService.cpp | 71 +- security/manager/ssl/nsClientAuthRemember.cpp | 21 +- security/manager/ssl/nsCryptoHash.cpp | 2 +- security/manager/ssl/nsIKeyModule.idl | 34 +- security/manager/ssl/nsIX509Cert.idl | 4 +- security/manager/ssl/nsKeyModule.cpp | 181 ++-- security/manager/ssl/nsKeyModule.h | 36 +- security/manager/ssl/nsNSSCertificate.cpp | 51 +- security/manager/ssl/nsNSSCertificate.h | 11 - security/manager/ssl/nsNSSCertificateDB.cpp | 91 +- .../ssl/nsNSSCertificateFakeTransport.cpp | 2 +- security/manager/ssl/nsNSSShutDown.h | 16 + .../manager/ssl/tests/unit/test_cert_dbKey.js | 152 ++++ .../ssl/tests/unit/test_weak_crypto.js | 43 +- security/manager/ssl/tests/unit/xpcshell.ini | 1 + .../configs/beetmover/en_us.yml.tmpl | 84 ++ .../configs/beetmover/partials.yml.tmpl | 16 + .../external_tools/extract_and_run_command.py | 205 +++++ .../mozharness/scripts/release/beet_mover.py | 343 +++++++ xpcom/threads/LazyIdleThread.h | 5 +- xpcom/threads/SharedThreadPool.h | 6 +- xpcom/threads/nsIEventTarget.idl | 7 +- xpcom/threads/nsIThread.idl | 2 +- xpcom/threads/nsIThreadInternal.idl | 2 +- xpcom/threads/nsIThreadPool.idl | 2 +- xpcom/threads/nsThread.h | 5 +- xpcom/threads/nsThreadPool.h | 5 +- 307 files changed, 15145 insertions(+), 3072 deletions(-) create mode 100644 js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js create mode 100644 js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js create mode 100644 js/src/jit-test/tests/debug/bug-1238610.js create mode 100644 js/src/jit-test/tests/debug/bug1240546.js create mode 100644 js/src/jit-test/tests/debug/bug1240803.js create mode 100644 js/src/jit-test/tests/debug/bug1242111.js create mode 100644 js/src/jit-test/tests/gc/bug-1240503.js create mode 100644 js/src/jit-test/tests/modules/global-scope.js create mode 100644 js/src/jit-test/tests/modules/off-thread-compile.js create mode 100644 js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js create mode 100644 js/src/tests/ecma_6/Array/sort_holes.js create mode 100644 js/src/tests/ecma_6/Function/arguments-iterator.js delete mode 100644 layout/reftests/bidi/83958-2c.html create mode 100644 media/libopus/celt/arm/celt_ne10_fft.c create mode 100644 media/libopus/celt/arm/celt_ne10_mdct.c create mode 100644 media/libopus/celt/arm/celt_neon_intr.c create mode 100644 media/libopus/celt/arm/fft_arm.h create mode 100644 media/libopus/celt/arm/mdct_arm.h create mode 100644 media/libopus/celt/mips/celt_mipsr1.h create mode 100644 media/libopus/celt/mips/fixed_generic_mipsr1.h create mode 100644 media/libopus/celt/mips/kiss_fft_mipsr1.h create mode 100644 media/libopus/celt/mips/mdct_mipsr1.h create mode 100644 media/libopus/celt/mips/pitch_mipsr1.h create mode 100644 media/libopus/celt/mips/vq_mipsr1.h create mode 100644 media/libopus/celt/static_modes_fixed_arm_ne10.h create mode 100644 media/libopus/celt/static_modes_float_arm_ne10.h create mode 100644 media/libopus/celt/x86/celt_lpc_sse.c create mode 100644 media/libopus/celt/x86/celt_lpc_sse.h create mode 100644 media/libopus/celt/x86/pitch_sse.c create mode 100644 media/libopus/celt/x86/pitch_sse2.c create mode 100644 media/libopus/celt/x86/pitch_sse4_1.c create mode 100644 media/libopus/celt/x86/x86_celt_map.c create mode 100644 media/libopus/celt/x86/x86cpu.c create mode 100644 media/libopus/celt/x86/x86cpu.h create mode 100644 media/libopus/nonunified.patch create mode 100644 media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h create mode 100644 media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h create mode 100644 media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h create mode 100644 media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c create mode 100644 media/libopus/silk/fixed/x86/prefilter_FIX_sse.c create mode 100644 media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c create mode 100644 media/libopus/silk/mips/NSQ_del_dec_mipsr1.h create mode 100644 media/libopus/silk/mips/macros_mipsr1.h create mode 100644 media/libopus/silk/mips/sigproc_fix_mipsr1.h create mode 100644 media/libopus/silk/x86/NSQ_del_dec_sse.c create mode 100644 media/libopus/silk/x86/NSQ_sse.c create mode 100644 media/libopus/silk/x86/SigProc_FIX_sse.h create mode 100644 media/libopus/silk/x86/VAD_sse.c create mode 100644 media/libopus/silk/x86/VQ_WMat_EC_sse.c create mode 100644 media/libopus/silk/x86/main_sse.h create mode 100644 media/libopus/silk/x86/x86_silk_map.c create mode 100644 release/docker/beet-mover/Dockerfile create mode 100644 security/manager/ssl/tests/unit/test_cert_dbKey.js create mode 100644 testing/mozharness/configs/beetmover/en_us.yml.tmpl create mode 100644 testing/mozharness/configs/beetmover/partials.yml.tmpl create mode 100644 testing/mozharness/external_tools/extract_and_run_command.py create mode 100644 testing/mozharness/scripts/release/beet_mover.py diff --git a/accessible/atk/AccessibleWrap.cpp b/accessible/atk/AccessibleWrap.cpp index 88c83d19d8..dd25b25d7b 100644 --- a/accessible/atk/AccessibleWrap.cpp +++ b/accessible/atk/AccessibleWrap.cpp @@ -1059,13 +1059,13 @@ GetAccessibleWrap(AtkObject* aAtkObj) NS_ENSURE_TRUE(isMAIObject || MAI_IS_ATK_SOCKET(aAtkObj), nullptr); - uintptr_t accWrapPtr = isMAIObject ? - MAI_ATK_OBJECT(aAtkObj)->accWrap.Bits() : - reinterpret_cast(MAI_ATK_SOCKET(aAtkObj)->accWrap); - if (accWrapPtr & IS_PROXY) - return nullptr; - - AccessibleWrap* accWrap = reinterpret_cast(accWrapPtr); + AccessibleWrap* accWrap = nullptr; + if (isMAIObject) { + Accessible* acc = MAI_ATK_OBJECT(aAtkObj)->accWrap.AsAccessible(); + accWrap = static_cast(acc); + } else { + accWrap = MAI_ATK_SOCKET(aAtkObj)->accWrap; + } // Check if the accessible was deconstructed. if (!accWrap) diff --git a/accessible/atk/moz.build b/accessible/atk/moz.build index 67a59e6536..dae3f86a60 100644 --- a/accessible/atk/moz.build +++ b/accessible/atk/moz.build @@ -52,6 +52,7 @@ if CONFIG['MOZ_ENABLE_DBUS']: include('/ipc/chromium/chromium-config.mozbuild') -if CONFIG['CLANG_CXX']: - # Suppress clang warning about unused function from gobject's RTTI macros. - CXXFLAGS += ['-Wno-unused-function'] +if CONFIG['CLANG_CXX'] or CONFIG['GNU_CXX']: + # Used in G_DEFINE_TYPE_EXTENDED macro, probably fixed in newer glib / + # gobject headers. See bug 1243331 comment 3. + CXXFLAGS += ['-Wno-unused-local-typedefs'] diff --git a/dom/animation/PseudoElementHashEntry.h b/dom/animation/PseudoElementHashEntry.h index c1f90e670c..b84f7c018d 100644 --- a/dom/animation/PseudoElementHashEntry.h +++ b/dom/animation/PseudoElementHashEntry.h @@ -49,6 +49,8 @@ public: return 0; // Convert the scoped enum into an integer while adding it to hash. + // Note: CSSPseudoElementTypeBase is uint8_t, so we convert it into + // uint8_t directly to avoid including the header. return mozilla::HashGeneric(aKey->mElement, static_cast(aKey->mPseudoType)); } diff --git a/dom/base/FeedWriterEnabled.h b/dom/base/FeedWriterEnabled.h index 52725bde3f..1bf39eba98 100644 --- a/dom/base/FeedWriterEnabled.h +++ b/dom/base/FeedWriterEnabled.h @@ -5,43 +5,14 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "js/TypeDecls.h" -#include "nsGlobalWindow.h" -#include "nsIPrincipal.h" -#include "nsIURI.h" -#include "nsString.h" -#include "xpcpublic.h" +#include "nsContentUtils.h" namespace mozilla { struct FeedWriterEnabled { static bool IsEnabled(JSContext* cx, JSObject* aGlobal) { - // Make sure the global is a window - nsGlobalWindow* win = xpc::WindowGlobalOrNull(aGlobal); - if (!win) { - return false; - } - - // Make sure that the principal is about:feeds. - nsCOMPtr principal = win->GetPrincipal(); - NS_ENSURE_TRUE(principal, false); - nsCOMPtr uri; - principal->GetURI(getter_AddRefs(uri)); - if (!uri) { - return false; - } - - // First check the scheme to avoid getting long specs in the common case. - bool isAbout = false; - uri->SchemeIs("about", &isAbout); - if (!isAbout) { - return false; - } - - // Now check the spec itself - nsAutoCString spec; - uri->GetSpec(spec); - return spec.EqualsLiteral("about:feeds"); + return nsContentUtils::IsSpecificAboutPage(aGlobal, "about:feeds"); } }; diff --git a/dom/base/ImageEncoder.cpp b/dom/base/ImageEncoder.cpp index bd8c5b18f1..5bbad65d68 100644 --- a/dom/base/ImageEncoder.cpp +++ b/dom/base/ImageEncoder.cpp @@ -5,7 +5,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "ImageEncoder.h" -#include "mozilla/ClearOnShutdown.h" #include "mozilla/dom/CanvasRenderingContext2D.h" #include "mozilla/gfx/2D.h" #include "mozilla/gfx/DataSurfaceHelpers.h" @@ -496,6 +495,38 @@ ImageEncoder::GetImageEncoder(nsAString& aType) return encoder.forget(); } +class EncoderThreadPoolTerminator final : public nsIObserver +{ + public: + NS_DECL_ISUPPORTS + + NS_IMETHODIMP Observe(nsISupports *, const char *topic, const char16_t *) override + { + NS_ASSERTION(!strcmp(topic, "xpcom-shutdown-threads"), + "Unexpected topic"); + if (ImageEncoder::sThreadPool) { + ImageEncoder::sThreadPool->Shutdown(); + ImageEncoder::sThreadPool = nullptr; + } + return NS_OK; + } + private: + ~EncoderThreadPoolTerminator() {} +}; + +NS_IMPL_ISUPPORTS(EncoderThreadPoolTerminator, nsIObserver) + +static void +RegisterEncoderThreadPoolTerminatorObserver() +{ + MOZ_ASSERT(NS_IsMainThread()); + nsCOMPtr os = mozilla::services::GetObserverService(); + NS_ASSERTION(os, "do_GetService failed"); + os->AddObserver(new EncoderThreadPoolTerminator(), + "xpcom-shutdown-threads", + false); +} + /* static */ nsresult ImageEncoder::EnsureThreadPool() @@ -503,12 +534,13 @@ ImageEncoder::EnsureThreadPool() if (!sThreadPool) { nsCOMPtr threadPool = do_CreateInstance(NS_THREADPOOL_CONTRACTID); sThreadPool = threadPool; + if (!NS_IsMainThread()) { NS_DispatchToMainThread(NS_NewRunnableFunction([]() -> void { - ClearOnShutdown(&sThreadPool); + RegisterEncoderThreadPoolTerminatorObserver(); })); } else { - ClearOnShutdown(&sThreadPool); + RegisterEncoderThreadPoolTerminatorObserver(); } const uint32_t kThreadLimit = 2; diff --git a/dom/base/ImageEncoder.h b/dom/base/ImageEncoder.h index de15902150..fd30a94d47 100644 --- a/dom/base/ImageEncoder.h +++ b/dom/base/ImageEncoder.h @@ -114,6 +114,7 @@ private: static StaticRefPtr sThreadPool; friend class EncodingRunnable; + friend class EncoderThreadPoolTerminator; }; /** diff --git a/dom/base/IndexedDBHelper.jsm b/dom/base/IndexedDBHelper.jsm index a3024c009a..9cfc631d13 100644 --- a/dom/base/IndexedDBHelper.jsm +++ b/dom/base/IndexedDBHelper.jsm @@ -4,8 +4,8 @@ "use strict"; -let DEBUG = 0; -let debug; +var DEBUG = 0; +var debug; if (DEBUG) { debug = function (s) { dump("-*- IndexedDBHelper: " + s + "\n"); } } else { @@ -24,13 +24,10 @@ Cu.importGlobalProperties(["indexedDB"]); XPCOMUtils.defineLazyModuleGetter(this, 'Services', 'resource://gre/modules/Services.jsm'); -this.IndexedDBHelper = function IndexedDBHelper() {} +this.IndexedDBHelper = function IndexedDBHelper() { +} IndexedDBHelper.prototype = { - - // Cache the database - _db: null, - // Close the database close: function close() { if (this._db) { @@ -48,8 +45,22 @@ IndexedDBHelper.prototype = { * @param failureCb * Error callback to call when an error is encountered. */ - open: function open(aSuccessCb, aFailureCb) { + open: function open(aCallback) { + if (aCallback && !this._waitForOpenCallbacks.has(aCallback)) { + this._waitForOpenCallbacks.add(aCallback); + if (this._waitForOpenCallbacks.size !== 1) { + return; + } + } + let self = this; + let invokeCallbacks = err => { + for (let callback of self._waitForOpenCallbacks) { + callback(err); + } + self._waitForOpenCallbacks.clear(); + }; + if (DEBUG) debug("Try to open database:" + self.dbName + " " + self.dbVersion); let req = indexedDB.open(this.dbName, this.dbVersion); req.onsuccess = function (event) { @@ -58,7 +69,7 @@ IndexedDBHelper.prototype = { self._db.onversionchange = function(event) { if (DEBUG) debug("WARNING: DB modified from a different window."); } - aSuccessCb && aSuccessCb(); + invokeCallbacks(); }; req.onupgradeneeded = function (aEvent) { @@ -72,7 +83,7 @@ IndexedDBHelper.prototype = { }; req.onerror = function (aEvent) { if (DEBUG) debug("Failed to open database: " + self.dbName); - aFailureCb && aFailureCb(aEvent.target.error.name); + invokeCallbacks(aEvent.target.error.name); }; req.onblocked = function (aEvent) { if (DEBUG) debug("Opening database request is blocked."); @@ -96,7 +107,13 @@ IndexedDBHelper.prototype = { } return; } - this.open(aSuccessCb, aFailureCb); + this.open(aError => { + if (aError) { + aFailureCb && aFailureCb(aError); + } else { + aSuccessCb && aSuccessCb(); + } + }); }, /** @@ -169,5 +186,8 @@ IndexedDBHelper.prototype = { this.dbName = aDBName; this.dbVersion = aDBVersion; this.dbStoreNames = aDBStoreNames; + // Cache the database. + this._db = null; + this._waitForOpenCallbacks = new Set(); } } diff --git a/dom/base/ShadowRoot.cpp b/dom/base/ShadowRoot.cpp index 968602f6de..ed60e55ae2 100644 --- a/dom/base/ShadowRoot.cpp +++ b/dom/base/ShadowRoot.cpp @@ -219,7 +219,7 @@ ShadowRoot::RemoveFromIdTable(Element* aElement, nsIAtom* aId) if (entry) { entry->RemoveIdElement(aElement); if (entry->IsEmpty()) { - mIdentifierMap.RawRemoveEntry(entry); + mIdentifierMap.RemoveEntry(entry); } } } diff --git a/dom/base/WebSocket.cpp b/dom/base/WebSocket.cpp index 8b2260fb35..45268ff7bc 100644 --- a/dom/base/WebSocket.cpp +++ b/dom/base/WebSocket.cpp @@ -81,10 +81,7 @@ public: NS_DECL_NSIREQUEST NS_DECL_THREADSAFE_ISUPPORTS NS_DECL_NSIEVENTTARGET - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; explicit WebSocketImpl(WebSocket* aWebSocket) : mWebSocket(aWebSocket) diff --git a/dom/base/nsContentUtils.cpp b/dom/base/nsContentUtils.cpp index 5b70906ac5..e9b1154144 100644 --- a/dom/base/nsContentUtils.cpp +++ b/dom/base/nsContentUtils.cpp @@ -8920,3 +8920,37 @@ nsContentUtils::SerializeNodeToMarkup(nsINode* aRoot, } } } + +bool +nsContentUtils::IsSpecificAboutPage(JSObject* aGlobal, const char* aUri) +{ + // aUri must start with about: or this isn't the right function to be using. + MOZ_ASSERT(strncmp(aUri, "about:", 6) == 0); + + // Make sure the global is a window + nsGlobalWindow* win = xpc::WindowGlobalOrNull(aGlobal); + if (!win) { + return false; + } + + // Make sure that the principal is about:feeds. + nsCOMPtr principal = win->GetPrincipal(); + NS_ENSURE_TRUE(principal, false); + nsCOMPtr uri; + principal->GetURI(getter_AddRefs(uri)); + if (!uri) { + return false; + } + + // First check the scheme to avoid getting long specs in the common case. + bool isAbout = false; + uri->SchemeIs("about", &isAbout); + if (!isAbout) { + return false; + } + + // Now check the spec itself + nsAutoCString spec; + uri->GetSpec(spec); + return spec.EqualsASCII(aUri); +} diff --git a/dom/base/nsContentUtils.h b/dom/base/nsContentUtils.h index 39b801a9e5..36618d264f 100644 --- a/dom/base/nsContentUtils.h +++ b/dom/base/nsContentUtils.h @@ -2600,6 +2600,14 @@ public: bool aDescendentsOnly, nsAString& aOut); + /* + * Returns true iff the provided JSObject is a global, and its URI matches + * the provided about: URI. + * @param aGlobal the JSObject whose URI to check, if it is a global. + * @param aUri the URI to match, e.g. "about:feeds" + */ + static bool IsSpecificAboutPage(JSObject* aGlobal, const char* aUri); + private: static bool InitializeEventTable(); diff --git a/dom/base/nsDOMWindowUtils.cpp b/dom/base/nsDOMWindowUtils.cpp index 7408c31b3b..8f9a070b4c 100644 --- a/dom/base/nsDOMWindowUtils.cpp +++ b/dom/base/nsDOMWindowUtils.cpp @@ -1821,16 +1821,6 @@ nsDOMWindowUtils::GetFocusedInputType(char** aType) return NS_OK; } -NS_IMETHODIMP -nsDOMWindowUtils::FindElementWithViewId(nsViewID aID, - nsIDOMElement** aResult) -{ - MOZ_RELEASE_ASSERT(nsContentUtils::IsCallerChrome()); - - RefPtr content = nsLayoutUtils::FindContentFor(aID); - return content ? CallQueryInterface(content, aResult) : NS_OK; -} - NS_IMETHODIMP nsDOMWindowUtils::GetViewId(nsIDOMElement* aElement, nsViewID* aResult) { @@ -2623,8 +2613,6 @@ nsDOMWindowUtils::ComputeAnimationDistance(nsIDOMElement* aElement, const nsAString& aValue2, double* aResult) { - MOZ_RELEASE_ASSERT(nsContentUtils::LegacyIsCallerChromeOrNativeCode()); - nsresult rv; nsCOMPtr content = do_QueryInterface(aElement, &rv); NS_ENSURE_SUCCESS(rv, rv); diff --git a/dom/base/nsDocument.cpp b/dom/base/nsDocument.cpp index 745adb7fe5..0fe5204fe2 100644 --- a/dom/base/nsDocument.cpp +++ b/dom/base/nsDocument.cpp @@ -3052,7 +3052,7 @@ nsDocument::RemoveFromIdTable(Element *aElement, nsIAtom* aId) ++mExpandoAndGeneration.generation; } if (entry->IsEmpty()) { - mIdentifierMap.RawRemoveEntry(entry); + mIdentifierMap.RemoveEntry(entry); } } diff --git a/dom/bindings/test/test_exception_options_from_jsimplemented.html b/dom/bindings/test/test_exception_options_from_jsimplemented.html index 32bf6ca31c..8a98a8fb63 100644 --- a/dom/bindings/test/test_exception_options_from_jsimplemented.html +++ b/dom/bindings/test/test_exception_options_from_jsimplemented.html @@ -18,9 +18,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=1107592 var file = location.href; var asyncFrame; /* Async parent frames from pushPrefEnv don't show up in e10s. */ - var isE10S = !SpecialPowers.Services.wm.getMostRecentWindow("navigator:browser"); + var isE10S = !SpecialPowers.isMainProcess(); if (!isE10S && SpecialPowers.getBoolPref("javascript.options.asyncstack")) { - asyncFrame = `Async*@${file}:153:1 + asyncFrame = `Async*@${file}:153:3 `; } else { asyncFrame = ""; diff --git a/dom/bindings/test/test_promise_rejections_from_jsimplemented.html b/dom/bindings/test/test_promise_rejections_from_jsimplemented.html index 6c9b5f22b2..1879224742 100644 --- a/dom/bindings/test/test_promise_rejections_from_jsimplemented.html +++ b/dom/bindings/test/test_promise_rejections_from_jsimplemented.html @@ -38,10 +38,10 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=1107592 function doTest() { var t = new TestInterfaceJS(); /* Async parent frames from pushPrefEnv don't show up in e10s. */ - var isE10S = !SpecialPowers.Services.wm.getMostRecentWindow("navigator:browser"); + var isE10S = !SpecialPowers.isMainProcess(); var asyncStack = SpecialPowers.getBoolPref("javascript.options.asyncstack"); var ourFile = location.href; - var parentFrame = (asyncStack && !isE10S) ? `Async*@${ourFile}:121:1 + var parentFrame = (asyncStack && !isE10S) ? `Async*@${ourFile}:121:3 ` : ""; Promise.all([ diff --git a/dom/interfaces/base/nsIDOMWindowUtils.idl b/dom/interfaces/base/nsIDOMWindowUtils.idl index 2a5858cab1..d86c748cbc 100644 --- a/dom/interfaces/base/nsIDOMWindowUtils.idl +++ b/dom/interfaces/base/nsIDOMWindowUtils.idl @@ -1480,13 +1480,6 @@ interface nsIDOMWindowUtils : nsISupports { */ readonly attribute string focusedInputType; - /** - * Given a view ID from the compositor process, retrieve the element - * associated with a view. For scrollpanes for documents, the root - * element of the document is returned. - */ - nsIDOMElement findElementWithViewId(in nsViewID aId); - /** * Find the view ID for a given element. This is the reverse of * findElementWithViewId(). diff --git a/dom/media/platforms/wmf/WMFDecoderModule.cpp b/dom/media/platforms/wmf/WMFDecoderModule.cpp index 875e206162..5914fde3ab 100644 --- a/dom/media/platforms/wmf/WMFDecoderModule.cpp +++ b/dom/media/platforms/wmf/WMFDecoderModule.cpp @@ -163,17 +163,29 @@ CanCreateWMFDecoder() return result.value(); } +/* static */ bool +WMFDecoderModule::HasH264() +{ + return CanCreateWMFDecoder(); +} + +/* static */ bool +WMFDecoderModule::HasAAC() +{ + return CanCreateWMFDecoder(); +} + bool WMFDecoderModule::SupportsMimeType(const nsACString& aMimeType) const { if ((aMimeType.EqualsLiteral("audio/mp4a-latm") || aMimeType.EqualsLiteral("audio/mp4")) && - CanCreateWMFDecoder()) { + WMFDecoderModule::HasAAC()) { return true; } if ((aMimeType.EqualsLiteral("video/avc") || aMimeType.EqualsLiteral("video/mp4")) && - CanCreateWMFDecoder()) { + WMFDecoderModule::HasH264()) { return true; } if (aMimeType.EqualsLiteral("audio/mpeg") && diff --git a/dom/media/platforms/wmf/WMFDecoderModule.h b/dom/media/platforms/wmf/WMFDecoderModule.h index a959e26091..67ec7ac6de 100644 --- a/dom/media/platforms/wmf/WMFDecoderModule.h +++ b/dom/media/platforms/wmf/WMFDecoderModule.h @@ -42,6 +42,15 @@ public: // Called from any thread, must call init first static int GetNumDecoderThreads(); static bool LowLatencyMFTEnabled(); + + // Accessors that report whether we have the required MFTs available + // on the system to play various codecs. Windows Vista doesn't have the + // H.264/AAC decoders if the "Platform Update Supplement for Windows Vista" + // is not installed, and Window N and KN variants also require a "Media + // Feature Pack" to be installed. Windows XP doesn't have WMF. + static bool HasAAC(); + static bool HasH264(); + private: bool mWMFInitialized; }; diff --git a/dom/wifi/WifiCertService.cpp b/dom/wifi/WifiCertService.cpp index d4fb8e04b4..f1508d25b7 100644 --- a/dom/wifi/WifiCertService.cpp +++ b/dom/wifi/WifiCertService.cpp @@ -439,6 +439,12 @@ WifiCertService::WifiCertService() WifiCertService::~WifiCertService() { MOZ_ASSERT(!gWifiCertService); + + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return; + } + shutdown(calledFromObject); } already_AddRefed diff --git a/dom/xul/XULDocument.cpp b/dom/xul/XULDocument.cpp index 14a3972d8c..3b5027d667 100644 --- a/dom/xul/XULDocument.cpp +++ b/dom/xul/XULDocument.cpp @@ -1831,7 +1831,7 @@ XULDocument::RemoveElementFromRefMap(Element* aElement) if (!entry) return; if (entry->RemoveElement(aElement)) { - mRefMap.RawRemoveEntry(entry); + mRefMap.RemoveEntry(entry); } } } diff --git a/extensions/cookie/nsPermissionManager.cpp b/extensions/cookie/nsPermissionManager.cpp index cd51955ebe..c686011390 100644 --- a/extensions/cookie/nsPermissionManager.cpp +++ b/extensions/cookie/nsPermissionManager.cpp @@ -794,7 +794,7 @@ nsPermissionManager::AddInternal(nsIPrincipal* aPrincipal, PermissionHashKey* entry = mPermissionTable.PutEntry(key); if (!entry) return NS_ERROR_FAILURE; if (!entry->GetKey()) { - mPermissionTable.RawRemoveEntry(entry); + mPermissionTable.RemoveEntry(entry); return NS_ERROR_OUT_OF_MEMORY; } @@ -929,7 +929,7 @@ nsPermissionManager::AddInternal(nsIPrincipal* aPrincipal, // If there are no more permissions stored for that entry, clear it. if (entry->GetPermissions().IsEmpty()) { - mPermissionTable.RawRemoveEntry(entry); + mPermissionTable.RemoveEntry(entry); } break; diff --git a/gfx/thebes/gfxFontconfigUtils.cpp b/gfx/thebes/gfxFontconfigUtils.cpp index 867f60747e..28f22db2bb 100644 --- a/gfx/thebes/gfxFontconfigUtils.cpp +++ b/gfx/thebes/gfxFontconfigUtils.cpp @@ -617,13 +617,13 @@ gfxFontconfigUtils::UpdateFontListInternal(bool aForce) bool added = entry->AddFont(font); if (!entry->mKey) { - // The reference to the font pattern keeps the pointer to - // string for the key valid. If adding the font failed - // then the entry must be removed. + // The reference to the font pattern keeps the pointer + // to string for the key valid. If adding the font + // failed then the entry must be removed. if (added) { entry->mKey = family; } else { - mFontsByFamily.RawRemoveEntry(entry); + mFontsByFamily.RemoveEntry(entry); } } } diff --git a/ipc/chromium/src/build/build_config.h b/ipc/chromium/src/build/build_config.h index 4208f727b5..7a4938e06c 100644 --- a/ipc/chromium/src/build/build_config.h +++ b/ipc/chromium/src/build/build_config.h @@ -81,10 +81,7 @@ #elif defined(__ppc__) || defined(__powerpc__) #define ARCH_CPU_PPC 1 #define ARCH_CPU_32_BITS 1 -#elif defined(__m68k__) -#define ARCH_CPU_M68K 1 -#define ARCH_CPU_32_BITS 1 -#elif defined(__sparc__) && defined(__arch64__) +#elif defined(__sparc64__) #define ARCH_CPU_SPARC 1 #define ARCH_CPU_64_BITS 1 #elif defined(__sparc__) diff --git a/js/public/MemoryMetrics.h b/js/public/MemoryMetrics.h index 5157dfa613..a552d5e524 100644 --- a/js/public/MemoryMetrics.h +++ b/js/public/MemoryMetrics.h @@ -166,10 +166,11 @@ struct ClassInfo #define FOR_EACH_SIZE(macro) \ macro(Objects, GCHeapUsed, objectsGCHeap) \ macro(Objects, MallocHeap, objectsMallocHeapSlots) \ - macro(Objects, MallocHeap, objectsMallocHeapElementsNonAsmJS) \ + macro(Objects, MallocHeap, objectsMallocHeapElementsNormal) \ macro(Objects, MallocHeap, objectsMallocHeapElementsAsmJS) \ + macro(Objects, NonHeap, objectsNonHeapElementsNormal) \ macro(Objects, NonHeap, objectsNonHeapElementsAsmJS) \ - macro(Objects, NonHeap, objectsNonHeapElementsMapped) \ + macro(Objects, NonHeap, objectsNonHeapElementsShared) \ macro(Objects, NonHeap, objectsNonHeapCodeAsmJS) \ macro(Objects, MallocHeap, objectsMallocHeapMisc) \ \ diff --git a/js/src/builtin/ModuleObject.cpp b/js/src/builtin/ModuleObject.cpp index f4998cde0e..52554e7b15 100644 --- a/js/src/builtin/ModuleObject.cpp +++ b/js/src/builtin/ModuleObject.cpp @@ -109,7 +109,7 @@ GlobalObject::initImportEntryProto(JSContext* cx, Handle global) } /* static */ ImportEntryObject* -ImportEntryObject::create(JSContext* cx, +ImportEntryObject::create(ExclusiveContext* cx, HandleAtom moduleRequest, HandleAtom importName, HandleAtom localName) @@ -181,7 +181,7 @@ StringOrNullValue(JSString* maybeString) } /* static */ ExportEntryObject* -ExportEntryObject::create(JSContext* cx, +ExportEntryObject::create(ExclusiveContext* cx, HandleAtom maybeExportName, HandleAtom maybeModuleRequest, HandleAtom maybeImportName, @@ -691,6 +691,62 @@ ModuleObject::initImportExportData(HandleArrayObject requestedModules, initReservedSlot(StarExportEntriesSlot, ObjectValue(*starExportEntries)); } +static bool +FreezeObjectProperty(JSContext* cx, HandleNativeObject obj, uint32_t slot) +{ + RootedObject property(cx, &obj->getSlot(slot).toObject()); + return FreezeObject(cx, property); +} + +/* static */ bool +ModuleObject::FreezeArrayProperties(JSContext* cx, HandleModuleObject self) +{ + return FreezeObjectProperty(cx, self, RequestedModulesSlot) && + FreezeObjectProperty(cx, self, ImportEntriesSlot) && + FreezeObjectProperty(cx, self, LocalExportEntriesSlot) && + FreezeObjectProperty(cx, self, IndirectExportEntriesSlot) && + FreezeObjectProperty(cx, self, StarExportEntriesSlot); +} + +static inline void +AssertObjectPropertyFrozen(JSContext* cx, HandleNativeObject obj, uint32_t slot) +{ +#ifdef DEBUG + bool frozen = false; + RootedObject property(cx, &obj->getSlot(slot).toObject()); + MOZ_ALWAYS_TRUE(TestIntegrityLevel(cx, property, IntegrityLevel::Frozen, &frozen)); + MOZ_ASSERT(frozen); +#endif +} + +/* static */ inline void +ModuleObject::AssertArrayPropertiesFrozen(JSContext* cx, HandleModuleObject self) +{ + AssertObjectPropertyFrozen(cx, self, RequestedModulesSlot); + AssertObjectPropertyFrozen(cx, self, ImportEntriesSlot); + AssertObjectPropertyFrozen(cx, self, LocalExportEntriesSlot); + AssertObjectPropertyFrozen(cx, self, IndirectExportEntriesSlot); + AssertObjectPropertyFrozen(cx, self, StarExportEntriesSlot); +} + +inline static void +AssertModuleScopesMatch(ModuleObject* module) +{ + MOZ_ASSERT(IsStaticGlobalLexicalScope(module->enclosingStaticScope())); + MOZ_ASSERT(module->initialEnvironment().enclosingScope().as().staticScope() == + module->enclosingStaticScope()); +} + +void +ModuleObject::fixScopesAfterCompartmentMerge(JSContext* cx) +{ + AssertModuleScopesMatch(this); + Rooted lexicalScope(cx, &script()->global().lexicalScope()); + setReservedSlot(StaticScopeSlot, ObjectValue(lexicalScope->staticBlock())); + initialEnvironment().setEnclosingScope(lexicalScope); + AssertModuleScopesMatch(this); +} + bool ModuleObject::hasScript() const { @@ -762,6 +818,8 @@ ModuleObject::noteFunctionDeclaration(ExclusiveContext* cx, HandleAtom name, Han /* static */ bool ModuleObject::instantiateFunctionDeclarations(JSContext* cx, HandleModuleObject self) { + AssertArrayPropertiesFrozen(cx, self); + FunctionDeclarationVector* funDecls = self->functionDeclarations(); if (!funDecls) { JS_ReportError(cx, "Module function declarations have already been instantiated"); @@ -798,6 +856,8 @@ ModuleObject::setEvaluated() /* static */ bool ModuleObject::evaluate(JSContext* cx, HandleModuleObject self, MutableHandleValue rval) { + AssertArrayPropertiesFrozen(cx, self); + RootedScript script(cx, self->script()); RootedModuleEnvironmentObject scope(cx, self->environment()); if (!scope) { @@ -887,7 +947,7 @@ js::InitModuleClasses(JSContext* cx, HandleObject obj) /////////////////////////////////////////////////////////////////////////// // ModuleBuilder -ModuleBuilder::ModuleBuilder(JSContext* cx, HandleModuleObject module) +ModuleBuilder::ModuleBuilder(ExclusiveContext* cx, HandleModuleObject module) : cx_(cx), module_(cx, module), requestedModules_(cx, AtomVector(cx)), @@ -1170,8 +1230,6 @@ ArrayObject* ModuleBuilder::createArray(const GCVector& vector) array->setDenseInitializedLength(length); for (uint32_t i = 0; i < length; i++) array->initDenseElement(i, MakeElementValue(vector[i])); - if (!JS_FreezeObject(cx_, array)) - return nullptr; return array; } diff --git a/js/src/builtin/ModuleObject.h b/js/src/builtin/ModuleObject.h index 973e026a5b..cca0e7b34b 100644 --- a/js/src/builtin/ModuleObject.h +++ b/js/src/builtin/ModuleObject.h @@ -43,9 +43,9 @@ class ImportEntryObject : public NativeObject }; static const Class class_; - static JSObject* initClass(JSContext* cx, HandleObject obj); + static JSObject* initClass(ExclusiveContext* cx, HandleObject obj); static bool isInstance(HandleValue value); - static ImportEntryObject* create(JSContext* cx, + static ImportEntryObject* create(ExclusiveContext* cx, HandleAtom moduleRequest, HandleAtom importName, HandleAtom localName); @@ -70,9 +70,9 @@ class ExportEntryObject : public NativeObject }; static const Class class_; - static JSObject* initClass(JSContext* cx, HandleObject obj); + static JSObject* initClass(ExclusiveContext* cx, HandleObject obj); static bool isInstance(HandleValue value); - static ExportEntryObject* create(JSContext* cx, + static ExportEntryObject* create(ExclusiveContext* cx, HandleAtom maybeExportName, HandleAtom maybeModuleRequest, HandleAtom maybeImportName, @@ -232,6 +232,9 @@ class ModuleObject : public NativeObject HandleArrayObject localExportEntries, HandleArrayObject indiretExportEntries, HandleArrayObject starExportEntries); + static bool FreezeArrayProperties(JSContext* cx, HandleModuleObject self); + static void AssertArrayPropertiesFrozen(JSContext* cx, HandleModuleObject self); + void fixScopesAfterCompartmentMerge(JSContext* cx); JSScript* script() const; JSObject* enclosingStaticScope() const; @@ -273,7 +276,7 @@ class ModuleObject : public NativeObject class MOZ_STACK_CLASS ModuleBuilder { public: - explicit ModuleBuilder(JSContext* cx, HandleModuleObject module); + explicit ModuleBuilder(ExclusiveContext* cx, HandleModuleObject module); bool processImport(frontend::ParseNode* pn); bool processExport(frontend::ParseNode* pn); @@ -296,7 +299,7 @@ class MOZ_STACK_CLASS ModuleBuilder using RootedImportEntryVector = JS::Rooted; using RootedExportEntryVector = JS::Rooted; - JSContext* cx_; + ExclusiveContext* cx_; RootedModuleObject module_; RootedAtomVector requestedModules_; RootedAtomVector importedBoundNames_; diff --git a/js/src/builtin/Sorting.js b/js/src/builtin/Sorting.js index ade8fe1954..c35a399f0a 100644 --- a/js/src/builtin/Sorting.js +++ b/js/src/builtin/Sorting.js @@ -6,12 +6,158 @@ // consolidated here to avoid confusion and re-implementation of existing // algorithms. +// For sorting values with limited range; uint8 and int8. +function CountingSort(array, len, signed) { + var buffer = new List(); + var min = 0; + + // Map int8 values onto the uint8 range when storing in buffer. + if (signed) { + min = -128; + } + + for (var i = 0; i < 256; i++) { + buffer[i] = 0; + } + + // Populate the buffer + for (var i = 0; i < len; i++) { + var val = array[i]; + buffer[val - min]++ + } + + // Traverse the buffer in order and write back elements to array + var val = 0; + for (var i = 0; i < len; i++) { + // Invariant: sum(buffer[val:]) == len-i + while (true) { + if (buffer[val] > 0) { + array[i] = val + min; + buffer[val]--; + break; + } else { + val++; + } + } + } + return array; +} + +// Helper for RadixSort +function ByteAtCol(x, pos) { + return (x >> (pos * 8)) & 0xFF; +} + +function SortByColumn(array, len, aux, col) { + const R = 256; + let counts = new List(); + + // |counts| is used to compute the starting index position for each key. + // Letting counts[0] always be 0, simplifies the transform step below. + // Example: + // + // Computing frequency counts for the input [1 2 1] gives: + // 0 1 2 3 ... (keys) + // 0 0 2 1 (frequencies) + // + // Transforming frequencies to indexes gives: + // 0 1 2 3 ... (keys) + // 0 0 2 3 (indexes) + for (let r = 0; r < R + 1; r++) { + counts[r] = 0; + } + // Compute frequency counts + for (let i = 0; i < len; i++) { + let val = array[i]; + let b = ByteAtCol(val, col); + counts[b + 1]++; + } + + // Transform counts to indices. + for (let r = 0; r < R; r++) { + counts[r+1] += counts[r]; + } + + // Distribute + for (let i = 0; i < len; i++) { + let val = array[i]; + let b = ByteAtCol(val, col); + aux[counts[b]++] = val; + } + + // Copy back + for (let i = 0; i < len; i++) { + array[i] = aux[i]; + } +} + +// Sorts integers and float32. |signed| is true for int16 and int32, |floating| +// is true for float32. +function RadixSort(array, len, nbytes, signed, floating, comparefn) { + + // Determined by performance testing. + if (len < 128) { + QuickSort(array, len, comparefn); + return array; + } + + let aux = new List(); + for (let i = 0; i < len; i++) { + aux[i] = 0; + } + + let view = array; + let signMask = 1 << nbytes * 8 - 1; + + // Preprocess + if (floating) { + view = new Int32Array(array.buffer); + + // Flip sign bit for positive numbers; flip all bits for negative + // numbers + for (let i = 0; i < len; i++) { + if (view[i] & signMask) { + view[i] ^= 0xFFFFFFFF; + } else { + view[i] ^= signMask + } + } + } else if (signed) { + // Flip sign bit + for (let i = 0; i < len; i++) { + view[i] ^= signMask + } + } + + // Sort + for (let col = 0; col < nbytes; col++) { + SortByColumn(view, len, aux, col); + } + + // Restore original bit representation + if (floating) { + for (let i = 0; i < len; i++) { + if (view[i] & signMask) { + view[i] ^= signMask; + } else { + view[i] ^= 0xFFFFFFFF; + } + } + } else if (signed) { + for (let i = 0; i < len; i++) { + view[i] ^= signMask + } + } + return array; +} + + // For sorting small arrays. function InsertionSort(array, from, to, comparefn) { - var item, swap; - for (var i = from + 1; i <= to; i++) { + let item, swap, i, j; + for (i = from + 1; i <= to; i++) { item = array[i]; - for (var j = i - 1; j >= from; j--) { + for (j = i - 1; j >= from; j--) { swap = array[j]; if (comparefn(swap, item) <= 0) break; @@ -28,28 +174,29 @@ function SwapArrayElements(array, i, j) { } // A helper function for MergeSort. -function Merge(array, start, mid, end, lBuffer, rBuffer, comparefn) { +function Merge(list, start, mid, end, lBuffer, rBuffer, comparefn) { var i, j, k; var sizeLeft = mid - start + 1; var sizeRight = end - mid; - // Copy our virtual arrays into separate buffers. + // Copy our virtual lists into separate buffers. for (i = 0; i < sizeLeft; i++) - lBuffer[i] = array[start + i]; + lBuffer[i] = list[start + i]; for (j = 0; j < sizeRight; j++) - rBuffer[j] = array[mid + 1 + j]; + rBuffer[j] = list[mid + 1 + j]; + i = 0; j = 0; k = start; while (i < sizeLeft && j < sizeRight) { if (comparefn(lBuffer[i], rBuffer[j]) <= 0) { - array[k] = lBuffer[i]; + list[k] = lBuffer[i]; i++; } else { - array[k] = rBuffer[j]; + list[k] = rBuffer[j]; j++; } k++; @@ -57,46 +204,70 @@ function Merge(array, start, mid, end, lBuffer, rBuffer, comparefn) { // Empty out any remaining elements in the buffer. while (i < sizeLeft) { - array[k] = lBuffer[i]; + list[k] =lBuffer[i]; i++; k++; } while (j < sizeRight) { - array[k] = rBuffer[j]; + list[k] =rBuffer[j]; j++; k++; } } +// Helper function for overwriting a sparse array with a +// dense array, filling remaining slots with holes. +function MoveHoles(sparse, sparseLen, dense, denseLen) { + for (var i = 0; i < denseLen; i++) + _DefineDataProperty(sparse, i, dense[i]); + for (var j = denseLen; j < sparseLen; j++) + delete sparse[j]; +} + // Iterative, bottom up, mergesort. function MergeSort(array, len, comparefn) { + // To save effort we will do all of our work on a dense list, + // then create holes at the end. + var denseList = new List(); + var denseLen = 0; + + for (var i = 0; i < len; i++) { + if (i in array) + denseList[denseLen++] = array[i]; + } + + if (denseLen < 1) + return array; + // Insertion sort for small arrays, where "small" is defined by performance // testing. if (len < 24) { - InsertionSort(array, 0, len - 1, comparefn); - return array; + InsertionSort(denseList, 0, denseLen - 1, comparefn); + MoveHoles(array, len, denseList, denseLen); + return array; } // We do all of our allocating up front var lBuffer = new List(); var rBuffer = new List(); - var mid, end, endOne, endTwo; - for (var windowSize = 1; windowSize < len; windowSize = 2*windowSize) { - for (var start = 0; start < len - 1; start += 2*windowSize) { - assert(windowSize < len, "The window size is larger than the array length!"); + var mid, end, endOne, endTwo; + for (var windowSize = 1; windowSize < denseLen; windowSize = 2 * windowSize) { + for (var start = 0; start < denseLen - 1; start += 2 * windowSize) { + assert(windowSize < denseLen, "The window size is larger than the array denseLength!"); // The midpoint between the two subarrays. mid = start + windowSize - 1; // To keep from going over the edge. end = start + 2 * windowSize - 1; - end = end < len - 1 ? end : len - 1; + end = end < denseLen - 1 ? end : denseLen - 1; // Skip lopsided runs to avoid doing useless work if (mid > end) continue; - Merge(array, start, mid, end, lBuffer, rBuffer, comparefn); + Merge(denseList, start, mid, end, lBuffer, rBuffer, comparefn); } } + MoveHoles(array, len, denseList, denseLen); return array; } diff --git a/js/src/builtin/TypedArray.js b/js/src/builtin/TypedArray.js index 3966b1ad40..143aeb5b21 100644 --- a/js/src/builtin/TypedArray.js +++ b/js/src/builtin/TypedArray.js @@ -988,7 +988,22 @@ function TypedArraySort(comparefn) { if (comparefn === undefined) { comparefn = TypedArrayCompare; - } else { + // CountingSort doesn't invoke the comparefn + if (IsUint8TypedArray(obj)) { + return CountingSort(obj, len, false /* signed */); + } else if (IsInt8TypedArray(obj)) { + return CountingSort(obj, len, true /* signed */); + } else if (IsUint16TypedArray(obj)) { + return RadixSort(obj, len, 2 /* nbytes */, false /* signed */, false /* floating */, comparefn); + } else if (IsInt16TypedArray(obj)) { + return RadixSort(obj, len, 2 /* nbytes */, true /* signed */, false /* floating */, comparefn); + } else if (IsUint32TypedArray(obj)) { + return RadixSort(obj, len, 4 /* nbytes */, false /* signed */, false /* floating */, comparefn); + } else if (IsInt32TypedArray(obj)) { + return RadixSort(obj, len, 4 /* nbytes */, true /* signed */, false /* floating */, comparefn); + } else if (IsFloat32TypedArray(obj)) { + return RadixSort(obj, len, 4 /* nbytes */, true /* signed */, true /* floating */, comparefn); + } // To satisfy step 2 from TypedArray SortCompare described in 22.2.3.26 // the user supplied comparefn is wrapped. var wrappedCompareFn = comparefn; diff --git a/js/src/doc/Debugger/Debugger.md b/js/src/doc/Debugger/Debugger.md index 8048fcd8fd..df275002fc 100644 --- a/js/src/doc/Debugger/Debugger.md +++ b/js/src/doc/Debugger/Debugger.md @@ -536,3 +536,13 @@ other kinds of objects. `TypeError`. Determine which global is designated by global using the same rules as [`Debugger.prototype.addDebuggee`][add]. +## Static methods of the Debugger Object + +The functions described below are not called with a `this` value. + +isCompilableUnit(source) +: Given a string of source code, designated by source, return false if + the string might become a valid JavaScript statement with the addition of + more lines. Otherwise return true. The intent is to support interactive + compilation - accumulate lines in a buffer until isCompilableUnit is true, + then pass it to the compiler. diff --git a/js/src/frontend/BytecodeCompiler.cpp b/js/src/frontend/BytecodeCompiler.cpp index 3ea9abe339..eade3ffa7c 100644 --- a/js/src/frontend/BytecodeCompiler.cpp +++ b/js/src/frontend/BytecodeCompiler.cpp @@ -576,7 +576,7 @@ ModuleObject* BytecodeCompiler::compileModule() module->init(script); - ModuleBuilder builder(cx->asJSContext(), module); + ModuleBuilder builder(cx, module); ParseNode* pn = parser->standaloneModule(module, builder); if (!pn) return nullptr; @@ -759,20 +759,32 @@ frontend::CompileScript(ExclusiveContext* cx, LifoAlloc* alloc, HandleObject sco } ModuleObject* -frontend::CompileModule(JSContext* cx, HandleObject obj, - const ReadOnlyCompileOptions& optionsInput, - SourceBufferHolder& srcBuf) +frontend::CompileModule(ExclusiveContext* cx, const ReadOnlyCompileOptions& optionsInput, + SourceBufferHolder& srcBuf, LifoAlloc* alloc) { MOZ_ASSERT(srcBuf.get()); + MOZ_ASSERT(cx->isJSContext() == (alloc == nullptr)); + + if (!alloc) + alloc = &cx->asJSContext()->tempLifoAlloc(); CompileOptions options(cx, optionsInput); options.maybeMakeStrictMode(true); // ES6 10.2.1 Module code is always strict mode code. options.setIsRunOnce(true); Rooted staticScope(cx, &cx->global()->lexicalScope().staticBlock()); - BytecodeCompiler compiler(cx, &cx->tempLifoAlloc(), options, srcBuf, staticScope, + BytecodeCompiler compiler(cx, alloc, options, srcBuf, staticScope, TraceLogger_ParserCompileModule); - return compiler.compileModule(); + RootedModuleObject module(cx, compiler.compileModule()); + if (!module) + return nullptr; + + // This happens in GlobalHelperThreadState::finishModuleParseTask() when a + // module is compiled off main thread. + if (cx->isJSContext() && !ModuleObject::FreezeArrayProperties(cx->asJSContext(), module)) + return nullptr; + + return module; } bool diff --git a/js/src/frontend/BytecodeCompiler.h b/js/src/frontend/BytecodeCompiler.h index 258149477f..dac0ce0df6 100644 --- a/js/src/frontend/BytecodeCompiler.h +++ b/js/src/frontend/BytecodeCompiler.h @@ -32,9 +32,9 @@ CompileScript(ExclusiveContext* cx, LifoAlloc* alloc, SourceCompressionTask* extraSct = nullptr, ScriptSourceObject** sourceObjectOut = nullptr); -ModuleObject * -CompileModule(JSContext *cx, HandleObject obj, const ReadOnlyCompileOptions &options, - SourceBufferHolder &srcBuf); +ModuleObject* +CompileModule(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, + SourceBufferHolder &srcBuf, LifoAlloc* alloc = nullptr); bool CompileLazyFunction(JSContext* cx, Handle lazy, const char16_t* chars, size_t length); diff --git a/js/src/gc/Zone.h b/js/src/gc/Zone.h index 8b7b1873cd..efeda7604a 100644 --- a/js/src/gc/Zone.h +++ b/js/src/gc/Zone.h @@ -128,7 +128,7 @@ struct Zone : public JS::shadow::Zone, { explicit Zone(JSRuntime* rt); ~Zone(); - bool init(bool isSystem); + MOZ_WARN_UNUSED_RESULT bool init(bool isSystem); void findOutgoingEdges(js::gc::ComponentFinder& finder); @@ -152,7 +152,8 @@ struct Zone : public JS::shadow::Zone, bool isTooMuchMalloc() const { return gcMallocBytes <= 0; } void onTooMuchMalloc(); - void* onOutOfMemory(js::AllocFunction allocFunc, size_t nbytes, void* reallocPtr = nullptr) { + MOZ_WARN_UNUSED_RESULT void* onOutOfMemory(js::AllocFunction allocFunc, size_t nbytes, + void* reallocPtr = nullptr) { if (!js::CurrentThreadCanAccessRuntime(runtime_)) return nullptr; return runtimeFromMainThread()->onOutOfMemory(allocFunc, nbytes, reallocPtr); @@ -360,7 +361,7 @@ struct Zone : public JS::shadow::Zone, mozilla::DebugOnly gcLastZoneGroupIndex; // Creates a HashNumber based on getUniqueId. Returns false on OOM. - bool getHashCode(js::gc::Cell* cell, js::HashNumber* hashp) { + MOZ_WARN_UNUSED_RESULT bool getHashCode(js::gc::Cell* cell, js::HashNumber* hashp) { uint64_t uid; if (!getUniqueId(cell, &uid)) return false; @@ -370,7 +371,7 @@ struct Zone : public JS::shadow::Zone, // Puts an existing UID in |uidp|, or creates a new UID for this Cell and // puts that into |uidp|. Returns false on OOM. - bool getUniqueId(js::gc::Cell* cell, uint64_t* uidp) { + MOZ_WARN_UNUSED_RESULT bool getUniqueId(js::gc::Cell* cell, uint64_t* uidp) { MOZ_ASSERT(uidp); MOZ_ASSERT(js::CurrentThreadCanAccessZone(this)); @@ -396,7 +397,7 @@ struct Zone : public JS::shadow::Zone, } // Return true if this cell has a UID associated with it. - bool hasUniqueId(js::gc::Cell* cell) { + MOZ_WARN_UNUSED_RESULT bool hasUniqueId(js::gc::Cell* cell) { MOZ_ASSERT(js::CurrentThreadCanAccessZone(this)); return uniqueIds_.has(cell); } diff --git a/js/src/jit-test/tests/collections/constructor-errors.js b/js/src/jit-test/tests/collections/constructor-errors.js index d5cbab9e79..6a145180f1 100644 --- a/js/src/jit-test/tests/collections/constructor-errors.js +++ b/js/src/jit-test/tests/collections/constructor-errors.js @@ -2,12 +2,9 @@ load(libdir + "asserts.js"); -function argsobj() { return arguments; } - var misc = [ {}, {x: 1}, Math, isNaN, Object.create(null), - argsobj(0, 1, 2), true, 0, 3.1416, new Boolean(true), new Number(0), {iterator: function () { return undefined; }}, @@ -19,4 +16,4 @@ var misc = [ for (var v of misc) { assertThrowsInstanceOf(function () { new Set(v); }, TypeError); assertThrowsInstanceOf(function () { new Map(v); }, TypeError); -} \ No newline at end of file +} diff --git a/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js b/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js new file mode 100644 index 0000000000..9653805c48 --- /dev/null +++ b/js/src/jit-test/tests/debug/Debugger-isCompilableUnit.js @@ -0,0 +1,58 @@ +load(libdir + "asserts.js"); + +const bad_types = [ + 2112, + {geddy: "lee"}, + () => 1, + [], + Array +] + +// We only accept strings around here! +for (var badType of bad_types) { + assertThrowsInstanceOf(() => { + Debugger.isCompilableUnit(badType); + }, TypeError); +} + +const compilable_units = [ + "wubba-lubba-dub-dub", + "'Get Schwifty!'", + "1 + 2", + "function f(x) {}", + "function x(f,) {", // statements with bad syntax are always compilable + "let x = 100", + ";;;;;;;;", + "", + " ", + "\n", + "let x", +] + +const non_compilable_units = [ + "function f(x) {", + "(...d) =>", + "{geddy:", + "{", + "[1, 2", + "[", + "1 +", + "let x =", + "3 ==", +] + +for (var code of compilable_units) { + assertEq(Debugger.isCompilableUnit(code), true); +} + +for (var code of non_compilable_units) { + assertEq(Debugger.isCompilableUnit(code), false); +} + +// Supplying no arguments should throw a type error +assertThrowsInstanceOf(() => { + Debugger.isCompilableUnit(); +}, TypeError); + +// Supplying extra arguments should be fine +assertEq(Debugger.isCompilableUnit("", 1, 2, 3, 4, {}, []), true); diff --git a/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js b/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js new file mode 100644 index 0000000000..633c8176b7 --- /dev/null +++ b/js/src/jit-test/tests/debug/Script-getOffsetsCoverage-bug1233178.js @@ -0,0 +1,13 @@ +gczeal(2); +g = newGlobal(); +dbg = Debugger(g); +function loop() { + for (var i = 0; i < 10; i++) + debugger; +} +g.eval(loop.toSource()); +dbg.onDebuggerStatement = function(f) { + f.script.getOffsetsCoverage(); +} +dbg.collectCoverageInfo = true; +g.eval("loop")(); diff --git a/js/src/jit-test/tests/debug/bug-1238610.js b/js/src/jit-test/tests/debug/bug-1238610.js new file mode 100644 index 0000000000..369d7550d0 --- /dev/null +++ b/js/src/jit-test/tests/debug/bug-1238610.js @@ -0,0 +1,27 @@ +// |jit-test| allow-oom + +if (!('oomAfterAllocations' in this)) + quit(); + +if (helperThreadCount() === 0) + quit(0); + +lfcode = new Array(); +dbg = Debugger(); +dbg.onEnterFrame = function() {}; +g = newGlobal(); +lfcode.push(` + oomAfterAllocations(100); + new Number(); + dbg.addDebuggee(g); +`) +file = lfcode.shift(); +loadFile(file); +function loadFile(lfVarx) { + lfGlobal = newGlobal() + for (lfLocal in this) + if (!(lfLocal in lfGlobal)) + lfGlobal[lfLocal] = this[lfLocal] + offThreadCompileScript(lfVarx) + lfGlobal.runOffThreadScript() +} diff --git a/js/src/jit-test/tests/debug/bug1240546.js b/js/src/jit-test/tests/debug/bug1240546.js new file mode 100644 index 0000000000..58efe086e2 --- /dev/null +++ b/js/src/jit-test/tests/debug/bug1240546.js @@ -0,0 +1,12 @@ +// |jit-test| allow-oom + +if (!('oomAfterAllocations' in this)) + quit(); + +var g = newGlobal(); +g.debuggeeGlobal = this; +g.eval("(" + function() { + oomAfterAllocations(100); + var dbg = Debugger(debuggeeGlobal); + dbg.onEnterFrame = function(frame) {} +} + ")()"); diff --git a/js/src/jit-test/tests/debug/bug1240803.js b/js/src/jit-test/tests/debug/bug1240803.js new file mode 100644 index 0000000000..84f6d17e59 --- /dev/null +++ b/js/src/jit-test/tests/debug/bug1240803.js @@ -0,0 +1,24 @@ +// |jit-test| allow-oom + + +if (!('oomAfterAllocations' in this)) + quit(); + +(function() { + g = newGlobal() + dbg = new Debugger + g.toggle = function(d) { + if (d) { + dbg.addDebuggee(g); + dbg.getNewestFrame(); + oomAfterAllocations(2); + setBreakpoint; + } + } + g.eval("" + function f(d) toggle(d)) + g.eval("(" + function() { + f(false); + f(true); + } + ")()") +})(); + diff --git a/js/src/jit-test/tests/debug/bug1242111.js b/js/src/jit-test/tests/debug/bug1242111.js new file mode 100644 index 0000000000..4b365ad5f5 --- /dev/null +++ b/js/src/jit-test/tests/debug/bug1242111.js @@ -0,0 +1,11 @@ +// |jit-test| allow-oom + +if (!('oomAfterAllocations' in this)) + quit(); + +var g = newGlobal(); +g.debuggeeGlobal = []; +g.eval("(" + function() { + oomAfterAllocations(57); + Debugger(debuggeeGlobal).onEnterFrame = function() {} +} + ")()"); diff --git a/js/src/jit-test/tests/for-of/non-iterable.js b/js/src/jit-test/tests/for-of/non-iterable.js index 319e622ef1..99f6b07802 100644 --- a/js/src/jit-test/tests/for-of/non-iterable.js +++ b/js/src/jit-test/tests/for-of/non-iterable.js @@ -2,12 +2,9 @@ load(libdir + "asserts.js"); -function argsobj() { return arguments; } - var misc = [ {}, {x: 1}, Math, isNaN, Object.create(null), - argsobj(0, 1, 2), null, undefined, true, 0, 3.1416, new Boolean(true), new Number(0), diff --git a/js/src/jit-test/tests/gc/bug-1240503.js b/js/src/jit-test/tests/gc/bug-1240503.js new file mode 100644 index 0000000000..0cc7cabe07 --- /dev/null +++ b/js/src/jit-test/tests/gc/bug-1240503.js @@ -0,0 +1,8 @@ +if (!('oomTest' in this)) + quit(); + +function arrayProtoOutOfRange() { + for (let [] = () => r, get;;) + var r = f(i % 2 ? a : b); +} +oomTest(arrayProtoOutOfRange); diff --git a/js/src/jit-test/tests/ion/bug832058.js b/js/src/jit-test/tests/ion/bug832058.js index e6877e5bf1..22dd37a4e0 100644 --- a/js/src/jit-test/tests/ion/bug832058.js +++ b/js/src/jit-test/tests/ion/bug832058.js @@ -1,4 +1,4 @@ -// |jit-test| error: TypeError +// |jit-test| function f(c) { var b = arguments; if (c == 1) @@ -9,6 +9,7 @@ function f(c) { evaluate("f('a', 'b', 'c', 'd', 'e');"); function test(){ var args = f('a', (0), 'c'); + var s; for (var v of args) s += v; } diff --git a/js/src/jit-test/tests/modules/global-scope.js b/js/src/jit-test/tests/modules/global-scope.js new file mode 100644 index 0000000000..90a9f7026b --- /dev/null +++ b/js/src/jit-test/tests/modules/global-scope.js @@ -0,0 +1,32 @@ +// Test interaction with global object and global lexical scope. + +function parseAndEvaluate(source) { + let m = parseModule(source); + m.declarationInstantiation(); + return m.evaluation(); +} + +var x = 1; +assertEq(parseAndEvaluate("let r = x; x = 2; r"), 1); +assertEq(x, 2); + +let y = 3; +assertEq(parseAndEvaluate("let r = y; y = 4; r"), 3); +assertEq(y, 4); + +if (helperThreadCount() == 0) + quit(); + +function offThreadParseAndEvaluate(source) { + offThreadCompileModule(source); + let m = finishOffThreadModule(); + print("compiled"); + m.declarationInstantiation(); + return m.evaluation(); +} + +assertEq(offThreadParseAndEvaluate("let r = x; x = 5; r"), 2); +assertEq(x, 5); + +assertEq(offThreadParseAndEvaluate("let r = y; y = 6; r"), 4); +assertEq(y, 6); diff --git a/js/src/jit-test/tests/modules/off-thread-compile.js b/js/src/jit-test/tests/modules/off-thread-compile.js new file mode 100644 index 0000000000..8d17d4a749 --- /dev/null +++ b/js/src/jit-test/tests/modules/off-thread-compile.js @@ -0,0 +1,16 @@ +// Test off thread module compilation. + +if (helperThreadCount() == 0) + quit(); + +load(libdir + "asserts.js"); +load(libdir + "dummyModuleResolveHook.js"); + +function offThreadParseAndEvaluate(source) { + offThreadCompileModule(source); + let m = finishOffThreadModule(); + m.declarationInstantiation(); + return m.evaluation(); +} + +offThreadParseAndEvaluate("export let x = 2 * 3;"); diff --git a/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js b/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js new file mode 100644 index 0000000000..2d111cc3a4 --- /dev/null +++ b/js/src/jit-test/tests/self-hosting/method-called-on-incompatible.js @@ -0,0 +1,9 @@ +load(libdir + "asserts.js"); + +assertTypeErrorMessage(() => WeakSet.prototype.add.call({}), "add method called on incompatible Object"); +assertTypeErrorMessage(() => newGlobal().WeakSet.prototype.add.call({}), "add method called on incompatible Object"); +assertTypeErrorMessage(() => WeakSet.prototype.add.call(15), "add method called on incompatible number"); + +assertTypeErrorMessage(() => Int8Array.prototype.find.call({}), "find method called on incompatible Object"); +assertTypeErrorMessage(() => newGlobal().Int8Array.prototype.find.call({}), "find method called on incompatible Object"); +assertTypeErrorMessage(() => Int8Array.prototype.find.call(15), "find method called on incompatible number"); diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index 792c59b8f9..c8e0f3f76a 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -963,7 +963,8 @@ JS_TransplantObject(JSContext* cx, HandleObject origobj, HandleObject target) MOZ_ASSERT(Wrapper::wrappedObject(newIdentityWrapper) == newIdentity); if (!JSObject::swap(cx, origobj, newIdentityWrapper)) MOZ_CRASH(); - origobj->compartment()->putWrapper(cx, CrossCompartmentKey(newIdentity), origv); + if (!origobj->compartment()->putWrapper(cx, CrossCompartmentKey(newIdentity), origv)) + MOZ_CRASH(); } // The new identity object might be one of several things. Return it to avoid @@ -1056,15 +1057,6 @@ static const JSStdName builtin_property_names[] = { #if JS_HAS_UNEVAL { EAGER_ATOM(uneval), JSProto_String }, #endif -#ifdef ENABLE_SIMD - { EAGER_ATOM(SIMD), JSProto_SIMD }, -#endif -#ifdef ENABLE_BINARYDATA - { EAGER_ATOM(TypedObject), JSProto_TypedObject }, -#endif -#ifdef ENABLE_SHARED_ARRAY_BUFFER - { EAGER_ATOM(Atomics), JSProto_Atomics }, -#endif { 0, JSProto_LIMIT } }; @@ -1104,14 +1096,8 @@ JS_ResolveStandardClass(JSContext* cx, HandleObject obj, HandleId id, bool* reso if (!stdnm) stdnm = LookupStdName(cx->names(), idAtom, builtin_property_names); -#ifdef ENABLE_SHARED_ARRAY_BUFFER - if (stdnm && !cx->compartment()->creationOptions().getSharedMemoryAndAtomicsEnabled() && - (stdnm->atomOffset == NAME_OFFSET(Atomics) || - stdnm->atomOffset == NAME_OFFSET(SharedArrayBuffer))) - { + if (stdnm && GlobalObject::skipDeselectedConstructor(cx, stdnm->key)) stdnm = nullptr; - } -#endif // If this class is anonymous, then it doesn't exist as a global // property, so we won't resolve anything. @@ -1154,6 +1140,9 @@ JS_MayResolveStandardClass(const JSAtomState& names, jsid id, JSObject* maybeObj JSAtom* atom = JSID_TO_ATOM(id); + // This will return true even for deselected constructors. (To do + // better, we need a JSContext here; it's fine as it is.) + return atom == names.undefined || LookupStdName(names, atom, standard_class_names) || LookupStdName(names, atom, builtin_property_names); @@ -1210,6 +1199,9 @@ JS_IdToProtoKey(JSContext* cx, HandleId id) if (!stdnm) return JSProto_Null; + if (GlobalObject::skipDeselectedConstructor(cx, stdnm->key)) + return JSProto_Null; + MOZ_ASSERT(MOZ_ARRAY_LENGTH(standard_class_names) == JSProto_LIMIT + 1); return static_cast(stdnm - standard_class_names); } @@ -3091,7 +3083,7 @@ DefineSelfHostedProperty(JSContext* cx, HandleObject obj, HandleId id, return false; } MOZ_ASSERT(setterValue.isObject() && setterValue.toObject().is()); - setterFunc = &getterValue.toObject().as(); + setterFunc = &setterValue.toObject().as(); } JSNative setterOp = JS_DATA_TO_FUNC_PTR(JSNative, setterFunc.get()); @@ -4153,11 +4145,11 @@ JS::FinishOffThreadScript(JSContext* maybecx, JSRuntime* rt, void* token) RootedScript script(maybecx); { AutoLastFrameCheck lfc(maybecx); - script = HelperThreadState().finishParseTask(maybecx, rt, token); + script = HelperThreadState().finishScriptParseTask(maybecx, rt, token); } return script; } else { - return HelperThreadState().finishParseTask(maybecx, rt, token); + return HelperThreadState().finishScriptParseTask(maybecx, rt, token); } } diff --git a/js/src/jsarray.cpp b/js/src/jsarray.cpp index 56a82acda9..d1608997c0 100644 --- a/js/src/jsarray.cpp +++ b/js/src/jsarray.cpp @@ -2343,7 +2343,11 @@ CanOptimizeForDenseStorage(HandleObject arr, uint32_t startingIndex, uint32_t co * visited. See bug 690622. */ ObjectGroup* arrGroup = arr->getGroup(cx); - if (MOZ_UNLIKELY(!arrGroup || arrGroup->hasAllFlags(OBJECT_FLAG_ITERATED))) + if (!arrGroup) { + cx->recoverFromOutOfMemory(); + return false; + } + if (MOZ_UNLIKELY(arrGroup->hasAllFlags(OBJECT_FLAG_ITERATED))) return false; /* diff --git a/js/src/jscompartment.cpp b/js/src/jscompartment.cpp index 45edbc057b..6e5f2f3fa6 100644 --- a/js/src/jscompartment.cpp +++ b/js/src/jscompartment.cpp @@ -670,7 +670,7 @@ JSCompartment::sweepInnerViews() void JSCompartment::sweepSavedStacks() { - savedStacks_.sweep(runtimeFromAnyThread()); + savedStacks_.sweep(); } void @@ -1078,8 +1078,9 @@ JSCompartment::clearScriptCounts() // Clear all hasScriptCounts_ flags of JSScript, in order to release all // ScriptCounts entry of the current compartment. for (ScriptCountsMap::Range r = scriptCountsMap->all(); !r.empty(); r.popFront()) { - ScriptCounts* value = &r.front().value(); + ScriptCounts* value = r.front().value(); r.front().key()->takeOverScriptCountsMapEntry(value); + js_delete(value); } js_delete(scriptCountsMap); diff --git a/js/src/jscompartment.h b/js/src/jscompartment.h index e59b7779f0..f27586287c 100644 --- a/js/src/jscompartment.h +++ b/js/src/jscompartment.h @@ -485,7 +485,8 @@ struct JSCompartment DebuggerNeedsDelazification = 1 << 4 }; - unsigned debugModeBits; + unsigned debugModeBits; + friend class AutoRestoreCompartmentDebugMode; static const unsigned DebuggerObservesMask = IsDebuggee | DebuggerObservesAllExecution | @@ -498,17 +499,18 @@ struct JSCompartment JSCompartment(JS::Zone* zone, const JS::CompartmentOptions& options); ~JSCompartment(); - bool init(JSContext* maybecx); + MOZ_WARN_UNUSED_RESULT bool init(JSContext* maybecx); - inline bool wrap(JSContext* cx, JS::MutableHandleValue vp, - JS::HandleObject existing = nullptr); + MOZ_WARN_UNUSED_RESULT inline bool wrap(JSContext* cx, JS::MutableHandleValue vp, + JS::HandleObject existing = nullptr); - bool wrap(JSContext* cx, js::MutableHandleString strp); - bool wrap(JSContext* cx, JS::MutableHandleObject obj, - JS::HandleObject existingArg = nullptr); - bool wrap(JSContext* cx, JS::MutableHandle desc); + MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, js::MutableHandleString strp); + MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, JS::MutableHandleObject obj, + JS::HandleObject existingArg = nullptr); + MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, JS::MutableHandle desc); - template bool wrap(JSContext* cx, JS::AutoVectorRooter& vec) { + template MOZ_WARN_UNUSED_RESULT bool wrap(JSContext* cx, + JS::AutoVectorRooter& vec) { for (size_t i = 0; i < vec.length(); ++i) { if (!wrap(cx, vec[i])) return false; @@ -516,7 +518,8 @@ struct JSCompartment return true; }; - bool putWrapper(JSContext* cx, const js::CrossCompartmentKey& wrapped, const js::Value& wrapper); + MOZ_WARN_UNUSED_RESULT bool putWrapper(JSContext* cx, const js::CrossCompartmentKey& wrapped, + const js::Value& wrapper); js::WrapperMap::Ptr lookupWrapper(const js::Value& wrapped) const { return crossCompartmentWrappers.lookup(js::CrossCompartmentKey(wrapped)); diff --git a/js/src/jsobj.cpp b/js/src/jsobj.cpp index 76d51bae68..210ff1b76c 100644 --- a/js/src/jsobj.cpp +++ b/js/src/jsobj.cpp @@ -3757,7 +3757,7 @@ JSObject::addSizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf, JS::ClassIn if (is() && as().hasDynamicElements()) { js::ObjectElements* elements = as().getElementsHeader(); if (!elements->isCopyOnWrite() || elements->ownerObject() == this) - info->objectsMallocHeapElementsNonAsmJS += mallocSizeOf(elements); + info->objectsMallocHeapElementsNormal += mallocSizeOf(elements); } // Other things may be measured in the future if DMD indicates it is worthwhile. diff --git a/js/src/jsprf.cpp b/js/src/jsprf.cpp index 0d367de047..0cdd27ddfb 100644 --- a/js/src/jsprf.cpp +++ b/js/src/jsprf.cpp @@ -1021,7 +1021,7 @@ JS_vsnprintf(char* out, uint32_t outlen, const char* fmt, va_list ap) (void) dosprintf(&ss, fmt, ap); uint32_t charsWritten = ss.cur - ss.base; - MOZ_ASSERT(charsWritten > 0); + MOZ_RELEASE_ASSERT(charsWritten > 0); // If we didn't append a null then we must have hit the buffer limit. Write // a null terminator now and return a value indicating that we failed. diff --git a/js/src/jsscript.cpp b/js/src/jsscript.cpp index 2cd21a4464..98018f7477 100644 --- a/js/src/jsscript.cpp +++ b/js/src/jsscript.cpp @@ -14,6 +14,7 @@ #include "mozilla/MathAlgorithms.h" #include "mozilla/MemoryReporting.h" #include "mozilla/PodOperations.h" +#include "mozilla/ScopeExit.h" #include "mozilla/Vector.h" #include @@ -1343,20 +1344,26 @@ JSScript::initScriptCounts(JSContext* cx) jsbytecode* mainEntry = main(); for (jsbytecode* pc = code(); pc != end; pc = GetNextPc(pc)) { if (pc == mainEntry) { - if (!jumpTargets.append(pc)) + if (!jumpTargets.append(pc)) { + ReportOutOfMemory(cx); return false; + } } bool jump = IsJumpOpcode(JSOp(*pc)); if (jump) { jsbytecode* target = pc + GET_JUMP_OFFSET(pc); - if (!jumpTargets.append(target)) + if (!jumpTargets.append(target)) { + ReportOutOfMemory(cx); return false; + } if (BytecodeFallsThrough(JSOp(*pc))) { jsbytecode* fallthrough = GetNextPc(pc); - if (!jumpTargets.append(fallthrough)) + if (!jumpTargets.append(fallthrough)) { + ReportOutOfMemory(cx); return false; + } } } @@ -1365,8 +1372,10 @@ JSScript::initScriptCounts(JSContext* cx) int32_t len = GET_JUMP_OFFSET(pc2); // Default target. - if (!jumpTargets.append(pc + len)) + if (!jumpTargets.append(pc + len)) { + ReportOutOfMemory(cx); return false; + } pc2 += JUMP_OFFSET_LEN; int32_t low = GET_JUMP_OFFSET(pc2); @@ -1378,8 +1387,10 @@ JSScript::initScriptCounts(JSContext* cx) int32_t off = (int32_t) GET_JUMP_OFFSET(pc2); if (off) { // Case (i + low) - if (!jumpTargets.append(pc + off)) + if (!jumpTargets.append(pc + off)) { + ReportOutOfMemory(cx); return false; + } } } } @@ -1396,8 +1407,10 @@ JSScript::initScriptCounts(JSContext* cx) continue; jsbytecode* tryTarget = tryStart + tn->length; - if (!jumpTargets.append(tryTarget)) + if (!jumpTargets.append(tryTarget)) { + ReportOutOfMemory(cx); return false; + } } } @@ -1408,8 +1421,10 @@ JSScript::initScriptCounts(JSContext* cx) // Initialize all PCCounts counters to 0. ScriptCounts::PCCountsVector base; - if (!base.reserve(jumpTargets.length())) + if (!base.reserve(jumpTargets.length())) { + ReportOutOfMemory(cx); return false; + } for (size_t i = 0; i < jumpTargets.length(); i++) base.infallibleEmplaceBack(pcToOffset(jumpTargets[i])); @@ -1418,8 +1433,10 @@ JSScript::initScriptCounts(JSContext* cx) ScriptCountsMap* map = compartment()->scriptCountsMap; if (!map) { map = cx->new_(); - if (!map) + if (!map) { + ReportOutOfMemory(cx); return false; + } if (!map->init()) { js_delete(map); @@ -1430,12 +1447,25 @@ JSScript::initScriptCounts(JSContext* cx) compartment()->scriptCountsMap = map; } - // Register the current ScriptCount in the compartment's map. - if (!map->putNew(this, Move(base))) + // Allocate the ScriptCounts. + ScriptCounts* sc = cx->new_(Move(base)); + if (!sc) { + ReportOutOfMemory(cx); return false; + } + auto guardScriptCounts = mozilla::MakeScopeExit([&] () { + js_delete(sc); + }); + + // Register the current ScriptCounts in the compartment's map. + if (!map->putNew(this, sc)) { + ReportOutOfMemory(cx); + return false; + } // safe to set this; we can't fail after this point. hasScriptCounts_ = true; + guardScriptCounts.release(); // Enable interrupts in any interpreter frames running on this script. This // is used to let the interpreter increment the PCCounts, if present. @@ -1460,7 +1490,7 @@ ScriptCounts& JSScript::getScriptCounts() { ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this); - return p->value(); + return *p->value(); } js::PCCounts* @@ -1620,7 +1650,7 @@ JSScript::takeOverScriptCountsMapEntry(ScriptCounts* entryValue) { #ifdef DEBUG ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this); - MOZ_ASSERT(entryValue == &p->value()); + MOZ_ASSERT(entryValue == p->value()); #endif hasScriptCounts_ = false; } @@ -1629,7 +1659,8 @@ void JSScript::releaseScriptCounts(ScriptCounts* counts) { ScriptCountsMap::Ptr p = GetScriptCountsMapEntry(this); - *counts = Move(p->value()); + *counts = Move(*p->value()); + js_delete(p->value()); compartment()->scriptCountsMap->remove(p); hasScriptCounts_ = false; } @@ -3217,21 +3248,17 @@ js::PCToLineNumber(unsigned startLine, jssrcnote* notes, jsbytecode* code, jsbyt ptrdiff_t target = pc - code; for (jssrcnote* sn = notes; !SN_IS_TERMINATOR(sn); sn = SN_NEXT(sn)) { offset += SN_DELTA(sn); - SrcNoteType type = (SrcNoteType) SN_TYPE(sn); - if (type == SRC_SETLINE) { - if (offset <= target) - lineno = unsigned(GetSrcNoteOffset(sn, 0)); - column = 0; - } else if (type == SRC_NEWLINE) { - if (offset <= target) - lineno++; - column = 0; - } - if (offset > target) break; - if (type == SRC_COLSPAN) { + SrcNoteType type = (SrcNoteType) SN_TYPE(sn); + if (type == SRC_SETLINE) { + lineno = unsigned(GetSrcNoteOffset(sn, 0)); + column = 0; + } else if (type == SRC_NEWLINE) { + lineno++; + column = 0; + } else if (type == SRC_COLSPAN) { ptrdiff_t colspan = SN_OFFSET_TO_COLSPAN(GetSrcNoteOffset(sn, 0)); MOZ_ASSERT(ptrdiff_t(column) + colspan >= 0); column += colspan; diff --git a/js/src/jsscript.h b/js/src/jsscript.h index d9738a35bc..a4fe4c2895 100644 --- a/js/src/jsscript.h +++ b/js/src/jsscript.h @@ -520,7 +520,7 @@ class ScriptCounts // the calls to the JSScript::finalize function which are used to aggregate code // coverage results on the compartment. typedef HashMap, SystemAllocPolicy> ScriptCountsMap; @@ -1052,11 +1052,10 @@ class JSScript : public js::gc::TenuredCell uint32_t sourceStart_; uint32_t sourceEnd_; - uint32_t warmUpCount; /* Number of times the script has been called - * or has had backedges taken. When running in - * ion, also increased for any inlined scripts. - * Reset if the script's JIT code is forcibly - * discarded. */ + // Number of times the script has been called or has had backedges taken. + // When running in ion, also increased for any inlined scripts. Reset if + // the script's JIT code is forcibly discarded. + mozilla::Atomic warmUpCount; // 16-bit fields. @@ -1719,7 +1718,7 @@ class JSScript : public js::gc::TenuredCell public: uint32_t getWarmUpCount() const { return warmUpCount; } uint32_t incWarmUpCounter(uint32_t amount = 1) { return warmUpCount += amount; } - uint32_t* addressOfWarmUpCounter() { return &warmUpCount; } + uint32_t* addressOfWarmUpCounter() { return reinterpret_cast(&warmUpCount); } static size_t offsetOfWarmUpCounter() { return offsetof(JSScript, warmUpCount); } void resetWarmUpCounter() { incWarmUpResetCounter(); warmUpCount = 0; } @@ -2143,8 +2142,8 @@ class LazyScript : public gc::TenuredCell // Function or block chain in which the script is nested, or nullptr. HeapPtrObject enclosingScope_; - // ScriptSourceObject, or nullptr if the script in which this is nested - // has not been compiled yet. This is never a CCW; we don't clone + // ScriptSourceObject. We leave this set to nullptr until we generate + // bytecode for our immediate parent. This is never a CCW; we don't clone // LazyScripts into other compartments. HeapPtrObject sourceObject_; diff --git a/js/src/jsversion.h b/js/src/jsversion.h index ec04292b98..81294ab24f 100644 --- a/js/src/jsversion.h +++ b/js/src/jsversion.h @@ -37,9 +37,8 @@ */ #define JS_OLD_GETTER_SETTER_METHODS 1 -/* Support for ES6 Classes. */ #ifdef NIGHTLY_BUILD -#define JS_HAS_CLASSES 1 -#endif + +#endif // NIGHTLY_BUILD #endif /* jsversion_h */ diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index a63500535a..c4a050a576 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -3332,7 +3332,6 @@ ParseModule(JSContext* cx, unsigned argc, Value* vp) return false; } - RootedObject global(cx, JS::CurrentGlobalOrNull(cx)); JSFlatString* scriptContents = args[0].toString()->ensureFlat(cx); if (!scriptContents) return false; @@ -3364,7 +3363,7 @@ ParseModule(JSContext* cx, unsigned argc, Value* vp) SourceBufferHolder srcBuf(chars, scriptContents->length(), SourceBufferHolder::NoOwnership); - RootedObject module(cx, frontend::CompileModule(cx, global, options, srcBuf)); + RootedObject module(cx, frontend::CompileModule(cx, options, srcBuf)); if (!module) return false; @@ -3698,6 +3697,109 @@ runOffThreadScript(JSContext* cx, unsigned argc, Value* vp) return JS_ExecuteScript(cx, script, args.rval()); } +static bool +CompileOffThreadModule(JSContext* cx, const ReadOnlyCompileOptions& options, + const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData) +{ + MOZ_ASSERT(JS::CanCompileOffThread(cx, options, length)); + return StartOffThreadParseModule(cx, options, chars, length, callback, callbackData); +} + +static JSObject* +FinishOffThreadModule(JSContext* maybecx, JSRuntime* rt, void* token) +{ + MOZ_ASSERT(CurrentThreadCanAccessRuntime(rt)); + return HelperThreadState().finishModuleParseTask(maybecx, rt, token); +} + +static bool +OffThreadCompileModule(JSContext* cx, unsigned argc, Value* vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + + if (args.length() != 1 || !args[0].isString()) { + JS_ReportErrorNumber(cx, my_GetErrorMessage, nullptr, JSSMSG_INVALID_ARGS, + "offThreadCompileModule"); + return false; + } + + JSAutoByteString fileNameBytes; + CompileOptions options(cx); + options.setIntroductionType("js shell offThreadCompileModule") + .setFileAndLine("", 1); + options.setIsRunOnce(true) + .setSourceIsLazy(false); + options.forceAsync = true; + + JSString* scriptContents = args[0].toString(); + AutoStableStringChars stableChars(cx); + if (!stableChars.initTwoByte(cx, scriptContents)) + return false; + + size_t length = scriptContents->length(); + const char16_t* chars = stableChars.twoByteRange().start().get(); + + // Make sure we own the string's chars, so that they are not freed before + // the compilation is finished. + ScopedJSFreePtr ownedChars; + if (stableChars.maybeGiveOwnershipToCaller()) { + ownedChars = const_cast(chars); + } else { + char16_t* copy = cx->pod_malloc(length); + if (!copy) + return false; + + mozilla::PodCopy(copy, chars, length); + ownedChars = copy; + chars = copy; + } + + if (!JS::CanCompileOffThread(cx, options, length)) { + JS_ReportError(cx, "cannot compile code on worker thread"); + return false; + } + + if (!offThreadState.startIfIdle(cx, ownedChars)) { + JS_ReportError(cx, "called offThreadCompileModule without receiving prior off-thread " + "compilation"); + return false; + } + + if (!CompileOffThreadModule(cx, options, chars, length, + OffThreadCompileScriptCallback, nullptr)) + { + offThreadState.abandon(cx); + return false; + } + + args.rval().setUndefined(); + return true; +} + +static bool +FinishOffThreadModule(JSContext* cx, unsigned argc, Value* vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + + JSRuntime* rt = cx->runtime(); + if (OffThreadParsingMustWaitForGC(rt)) + gc::AutoFinishGC finishgc(rt); + + void* token = offThreadState.waitUntilDone(cx); + if (!token) { + JS_ReportError(cx, "called finishOffThreadModule when no compilation is pending"); + return false; + } + + RootedObject module(cx, FinishOffThreadModule(cx, rt, token)); + if (!module) + return false; + + args.rval().setObject(*module); + return true; +} + struct MOZ_RAII FreeOnReturn { JSContext* cx; @@ -5057,6 +5159,16 @@ static const JSFunctionSpecWithHelp shell_functions[] = { " throw the appropriate exception; otherwise, run the script and return\n" " its value."), + JS_FN_HELP("offThreadCompileModule", OffThreadCompileModule, 1, 0, +"offThreadCompileModule(code)", +" Compile |code| on a helper thread. To wait for the compilation to finish\n" +" and get the module object, call |finishOffThreadModule|."), + + JS_FN_HELP("finishOffThreadModule", FinishOffThreadModule, 0, 0, +"finishOffThreadModule()", +" Wait for off-thread compilation to complete. If an error occurred,\n" +" throw the appropriate exception; otherwise, return the module object"), + JS_FN_HELP("timeout", Timeout, 1, 0, "timeout([seconds], [func])", " Get/Set the limit in seconds for the execution time for the current context.\n" diff --git a/js/src/tests/ecma_6/Array/iterator_edge_cases.js b/js/src/tests/ecma_6/Array/iterator_edge_cases.js index a52d1d40e1..079245ebfb 100644 --- a/js/src/tests/ecma_6/Array/iterator_edge_cases.js +++ b/js/src/tests/ecma_6/Array/iterator_edge_cases.js @@ -7,7 +7,7 @@ function TestArrayIteratorPrototypeConfusion() { throw new Error("Call did not throw"); } catch (e) { assertEq(e instanceof TypeError, true); - assertEq(e.message, "CallArrayIteratorMethodIfWrapped method called on incompatible Array Iterator"); + assertEq(e.message, "next method called on incompatible Array Iterator"); } } TestArrayIteratorPrototypeConfusion(); diff --git a/js/src/tests/ecma_6/Array/sort_holes.js b/js/src/tests/ecma_6/Array/sort_holes.js new file mode 100644 index 0000000000..332fbce37b --- /dev/null +++ b/js/src/tests/ecma_6/Array/sort_holes.js @@ -0,0 +1,72 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/licenses/publicdomain/ */ + +// We should preserve holes when sorting sparce arrays. +// See bug: 1246860 + +function denseCount(arr) { + var c = 0; + for (var i = 0; i < arr.length; i++) + if (i in arr) + c++; + return c; +} + +let a = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}]; +let b = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}].sort(); +let c = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}].sort((a, b) => {+a.size - +b.size}); + +assertEq(a.length, 22); +assertEq(denseCount(a), 2); +assertEq(a.length, b.length); +assertEq(b.length, c.length); +assertEq(denseCount(a), denseCount(b)); +assertEq(denseCount(b), denseCount(c)); + +let superSparce = new Array(5000); +superSparce[0] = 99; +superSparce[4000] = 0; +superSparce[4999] = -1; + +assertEq(superSparce.length, 5000); +assertEq(denseCount(superSparce), 3); + +superSparce.sort((a, b) => 1*a-b); +assertEq(superSparce.length, 5000); +assertEq(denseCount(superSparce), 3); +assertEq(superSparce[0], -1); +assertEq(superSparce[1], 0); +assertEq(superSparce[2], 99); + +let allHoles = new Array(2600); +assertEq(allHoles.length, 2600); +assertEq(denseCount(allHoles), 0); +allHoles.sort((a, b) => 1*a-b); +assertEq(allHoles.length, 2600); +assertEq(denseCount(allHoles), 0); + +let oneHole = new Array(2600); +oneHole[1399] = {size: 27}; +assertEq(oneHole.length, 2600); +assertEq(denseCount(oneHole), 1); +oneHole.sort((a, b) => {+a.size - +b.size}); +assertDeepEq(oneHole[0], {size: 27}); +assertEq(oneHole.length, 2600); +assertEq(denseCount(oneHole), 1); + +// Ensure that the array setter is touch touched during sorting. + +Object.defineProperty(Array.prototype, "0", { + set: (value) => {throw "Illegally touched the array's setter!"}, + configurable: true +}); + +assertThrows(() => {o[1] = 11;}); + +let o = [,,,,,,,,,,,,,,,,,,,,{size: 1},{size: 2}]; +o.sort((a, b) => {+a.size - +b.size}); + +delete Array.prototype["0"]; + +if (typeof reportCompare === 'function') + reportCompare(0, 0); diff --git a/js/src/tests/ecma_6/Function/arguments-iterator.js b/js/src/tests/ecma_6/Function/arguments-iterator.js new file mode 100644 index 0000000000..5610b8a245 --- /dev/null +++ b/js/src/tests/ecma_6/Function/arguments-iterator.js @@ -0,0 +1,167 @@ +var BUGNUMBER = 992617; +var summary = "Implement arguments[@@iterator]."; + +print(BUGNUMBER + ": " + summary); + +// MappedArgumentsObject +let mapped = [ + function(a, b, c) { + assertEq(Symbol.iterator in arguments, true); + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function(a, b, c) { + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function(a, b, c) { + arguments[Symbol.iterator] = 10; + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function(a, b, c) { + Object.defineProperty(arguments, Symbol.iterator, { + value: 10, writable: true, enumerable: true, configurable: true + }); + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function(a, b, c) { + assertEq(arguments[Symbol.iterator], Array.prototype[Symbol.iterator]); + }, + function(a, b, c) { + assertEq(arguments[Symbol.iterator].name, "values"); + }, + function(a, b, c) { + var desc = Object.getOwnPropertyDescriptor(arguments, Symbol.iterator); + assertEq("value" in desc, true); + assertEq(desc.value, Array.prototype[Symbol.iterator]); + assertEq(desc.writable, true); + assertEq(desc.enumerable, false); + assertEq(desc.configurable, true); + }, + function(a, b, c) { + var iter = arguments[Symbol.iterator](); + assertDeepEq(iter.next(), { value: 10, done: false }); + assertDeepEq(iter.next(), { value: 20, done: false }); + assertDeepEq(iter.next(), { value: 30, done: false }); + assertDeepEq(iter.next(), { value: undefined, done: true }); + }, + function(a, b, c) { + assertDeepEq([...arguments], [10, 20, 30]); + }, + function(a, b, c) { + b = 40; + assertDeepEq([...arguments], [10, 40, 30]); + }, + function(a, b, c) { + arguments.length = 4; + assertDeepEq([...arguments], [10, 20, 30, undefined]); + }, + function(a, b, c) { + arguments[5] = 50; + assertDeepEq([...arguments], [10, 20, 30]); + }, + function(a, b, c) { + arguments[Symbol.iterator] = function*() { + yield 40; + yield 50; + yield 60; + }; + assertDeepEq([...arguments], [40, 50, 60]); + }, +]; +for (let f of mapped) { + f(10, 20, 30); +} + +var g1 = newGlobal(); +assertEq(g1.eval(` +function f(a, b, c) { + return arguments[Symbol.iterator].name; +} +f(1, 2, 3); +`), "values"); + +// UnmappedArgumentsObject +let unmapped = [ + function([a], b, c) { + assertEq(Symbol.iterator in arguments, true); + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function([a], b, c) { + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function([a], b, c) { + arguments[Symbol.iterator] = 10; + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function([a], b, c) { + Object.defineProperty(arguments, Symbol.iterator, { + value: 10, writable: true, enumerable: true, configurable: true + }); + delete arguments[Symbol.iterator]; + assertEq(Symbol.iterator in arguments, false); + }, + function([a], b, c) { + assertEq(arguments[Symbol.iterator], Array.prototype[Symbol.iterator]); + }, + function([a], b, c) { + assertEq(arguments[Symbol.iterator].name, "values"); + }, + function([a], b, c) { + var desc = Object.getOwnPropertyDescriptor(arguments, Symbol.iterator); + assertEq("value" in desc, true); + assertEq(desc.value, Array.prototype[Symbol.iterator]); + assertEq(desc.writable, true); + assertEq(desc.enumerable, false); + assertEq(desc.configurable, true); + }, + function([a], b, c) { + var iter = arguments[Symbol.iterator](); + assertDeepEq(iter.next(), { value: [10], done: false }); + assertDeepEq(iter.next(), { value: 20, done: false }); + assertDeepEq(iter.next(), { value: 30, done: false }); + assertDeepEq(iter.next(), { value: undefined, done: true }); + }, + function([a], b, c) { + assertDeepEq([...arguments], [[10], 20, 30]); + }, + function([a], b, c) { + b = 40; + assertDeepEq([...arguments], [[10], 20, 30]); + }, + function([a], b, c) { + arguments.length = 4; + assertDeepEq([...arguments], [[10], 20, 30, undefined]); + }, + function([a], b, c) { + arguments[5] = 50; + assertDeepEq([...arguments], [[10], 20, 30]); + }, + function([a], b, c) { + arguments[Symbol.iterator] = function*() { + yield 40; + yield 50; + yield 60; + }; + assertDeepEq([...arguments], [40, 50, 60]); + }, +]; +for (let f of unmapped) { + f([10], 20, 30); +} + +var g2 = newGlobal(); +assertEq(g2.eval(` +function f([a], b, c) { + return arguments[Symbol.iterator].name; +} +f([1], 2, 3); +`), "values"); + +if (typeof reportCompare === "function") + reportCompare(true, true); diff --git a/js/src/tests/ecma_6/String/iterator_edge_cases.js b/js/src/tests/ecma_6/String/iterator_edge_cases.js index 2cba856909..15fe6d0fb1 100644 --- a/js/src/tests/ecma_6/String/iterator_edge_cases.js +++ b/js/src/tests/ecma_6/String/iterator_edge_cases.js @@ -7,7 +7,7 @@ function TestStringIteratorPrototypeConfusion() { throw new Error("Call did not throw"); } catch (e) { assertEq(e instanceof TypeError, true); - assertEq(e.message, "CallStringIteratorMethodIfWrapped method called on incompatible String Iterator"); + assertEq(e.message, "next method called on incompatible String Iterator"); } } TestStringIteratorPrototypeConfusion(); diff --git a/js/src/vm/ArgumentsObject.cpp b/js/src/vm/ArgumentsObject.cpp index 25becc6a5f..e409dfcb12 100644 --- a/js/src/vm/ArgumentsObject.cpp +++ b/js/src/vm/ArgumentsObject.cpp @@ -328,6 +328,8 @@ ArgumentsObject::obj_delProperty(JSContext* cx, HandleObject obj, HandleId id, argsobj.markLengthOverridden(); } else if (JSID_IS_ATOM(id, cx->names().callee)) { argsobj.as().clearCallee(); + } else if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) { + argsobj.markIteratorOverridden(); } return result.succeed(); } @@ -398,11 +400,34 @@ MappedArgSetter(JSContext* cx, HandleObject obj, HandleId id, MutableHandleValue NativeDefineProperty(cx, argsobj, id, vp, nullptr, nullptr, attrs, result); } +static bool +DefineArgumentsIterator(JSContext* cx, Handle argsobj) +{ + RootedId iteratorId(cx, SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator)); + HandlePropertyName shName = cx->names().ArrayValues; + RootedAtom name(cx, cx->names().values); + RootedValue val(cx); + if (!GlobalObject::getSelfHostedFunction(cx, cx->global(), shName, name, 0, &val)) + return false; + + return NativeDefineProperty(cx, argsobj, iteratorId, val, nullptr, nullptr, JSPROP_RESOLVING); +} + /* static */ bool MappedArgumentsObject::obj_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp) { Rooted argsobj(cx, &obj->as()); + if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) { + if (argsobj->hasOverriddenIterator()) + return true; + + if (!DefineArgumentsIterator(cx, argsobj)) + return false; + *resolvedp = true; + return true; + } + unsigned attrs = JSPROP_SHARED | JSPROP_SHADOWABLE | JSPROP_RESOLVING; if (JSID_IS_INT(id)) { uint32_t arg = uint32_t(JSID_TO_INT(id)); @@ -448,6 +473,10 @@ MappedArgumentsObject::obj_enumerate(JSContext* cx, HandleObject obj) if (!HasProperty(cx, argsobj, id, &found)) return false; + id = SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator); + if (!HasProperty(cx, argsobj, id, &found)) + return false; + for (unsigned i = 0; i < argsobj->initialLength(); i++) { id = INT_TO_JSID(i); if (!HasProperty(cx, argsobj, id, &found)) @@ -519,6 +548,16 @@ UnmappedArgumentsObject::obj_resolve(JSContext* cx, HandleObject obj, HandleId i { Rooted argsobj(cx, &obj->as()); + if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) { + if (argsobj->hasOverriddenIterator()) + return true; + + if (!DefineArgumentsIterator(cx, argsobj)) + return false; + *resolvedp = true; + return true; + } + unsigned attrs = JSPROP_SHARED | JSPROP_SHADOWABLE; GetterOp getter = UnmappedArgGetter; SetterOp setter = UnmappedArgSetter; @@ -570,6 +609,10 @@ UnmappedArgumentsObject::obj_enumerate(JSContext* cx, HandleObject obj) if (!HasProperty(cx, argsobj, id, &found)) return false; + id = SYMBOL_TO_JSID(cx->wellKnownSymbols().iterator); + if (!HasProperty(cx, argsobj, id, &found)) + return false; + for (unsigned i = 0; i < argsobj->initialLength(); i++) { id = INT_TO_JSID(i); if (!HasProperty(cx, argsobj, id, &found)) diff --git a/js/src/vm/ArgumentsObject.h b/js/src/vm/ArgumentsObject.h index 0eecff6d6c..636b7a79b7 100644 --- a/js/src/vm/ArgumentsObject.h +++ b/js/src/vm/ArgumentsObject.h @@ -109,8 +109,9 @@ static const unsigned ARGS_LENGTH_MAX = 500 * 1000; * * INITIAL_LENGTH_SLOT * Stores the initial value of arguments.length, plus a bit indicating - * whether arguments.length has been modified. Use initialLength() and - * hasOverriddenLength() to access these values. If arguments.length has + * whether arguments.length and/or arguments[@@iterator] have been + * modified. Use initialLength(), hasOverriddenLength(), and + * hasOverriddenIterator() to access these values. If arguments.length has * been modified, then the current value of arguments.length is stored in * another slot associated with a new property. * DATA_SLOT @@ -125,7 +126,8 @@ class ArgumentsObject : public NativeObject public: static const uint32_t LENGTH_OVERRIDDEN_BIT = 0x1; - static const uint32_t PACKED_BITS_COUNT = 1; + static const uint32_t ITERATOR_OVERRIDDEN_BIT = 0x2; + static const uint32_t PACKED_BITS_COUNT = 2; protected: template @@ -185,6 +187,18 @@ class ArgumentsObject : public NativeObject setFixedSlot(INITIAL_LENGTH_SLOT, Int32Value(v)); } + /* True iff arguments[@@iterator] has been assigned or its attributes + * changed. */ + bool hasOverriddenIterator() const { + const Value& v = getFixedSlot(INITIAL_LENGTH_SLOT); + return v.toInt32() & ITERATOR_OVERRIDDEN_BIT; + } + + void markIteratorOverridden() { + uint32_t v = getFixedSlot(INITIAL_LENGTH_SLOT).toInt32() | ITERATOR_OVERRIDDEN_BIT; + setFixedSlot(INITIAL_LENGTH_SLOT, Int32Value(v)); + } + /* * Because the arguments object is a real object, its elements may be * deleted. This is implemented by setting a 'deleted' flag for the arg diff --git a/js/src/vm/ArrayBufferObject.cpp b/js/src/vm/ArrayBufferObject.cpp index a82a855c0f..beedc9c385 100644 --- a/js/src/vm/ArrayBufferObject.cpp +++ b/js/src/vm/ArrayBufferObject.cpp @@ -576,16 +576,17 @@ ArrayBufferObject::setDataPointer(BufferContents contents, OwnsState ownsData) setFlags((flags() & ~KIND_MASK) | contents.kind()); } -size_t +uint32_t ArrayBufferObject::byteLength() const { - return size_t(getSlot(BYTE_LENGTH_SLOT).toDouble()); + return getSlot(BYTE_LENGTH_SLOT).toInt32(); } void -ArrayBufferObject::setByteLength(size_t length) +ArrayBufferObject::setByteLength(uint32_t length) { - setSlot(BYTE_LENGTH_SLOT, DoubleValue(length)); + MOZ_ASSERT(length <= INT32_MAX); + setSlot(BYTE_LENGTH_SLOT, Int32Value(length)); } uint32_t @@ -755,10 +756,10 @@ ArrayBufferObject::addSizeOfExcludingThis(JSObject* obj, mozilla::MallocSizeOf m switch (buffer.bufferKind()) { case PLAIN: - info->objectsMallocHeapElementsNonAsmJS += mallocSizeOf(buffer.dataPointer()); + info->objectsMallocHeapElementsNormal += mallocSizeOf(buffer.dataPointer()); break; case MAPPED: - info->objectsNonHeapElementsMapped += buffer.byteLength(); + info->objectsNonHeapElementsNormal += buffer.byteLength(); break; case WASM_MALLOCED: info->objectsMallocHeapElementsAsmJS += mallocSizeOf(buffer.dataPointer()); diff --git a/js/src/vm/ArrayBufferObject.h b/js/src/vm/ArrayBufferObject.h index 49cb887cdf..68f8a99db5 100644 --- a/js/src/vm/ArrayBufferObject.h +++ b/js/src/vm/ArrayBufferObject.h @@ -288,7 +288,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared public: uint8_t* dataPointer() const; SharedMem dataPointerShared() const; - size_t byteLength() const; + uint32_t byteLength() const; BufferContents contents() const { return BufferContents(dataPointer(), bufferKind()); } @@ -339,7 +339,7 @@ class ArrayBufferObject : public ArrayBufferObjectMaybeShared protected: void setDataPointer(BufferContents contents, OwnsState ownsState); - void setByteLength(size_t length); + void setByteLength(uint32_t length); uint32_t flags() const; void setFlags(uint32_t flags); diff --git a/js/src/vm/CallNonGenericMethod.cpp b/js/src/vm/CallNonGenericMethod.cpp index 157f348e21..131c447421 100644 --- a/js/src/vm/CallNonGenericMethod.cpp +++ b/js/src/vm/CallNonGenericMethod.cpp @@ -11,6 +11,7 @@ #include "proxy/Proxy.h" #include "vm/ProxyObject.h" +#include "vm/SelfHosting.h" using namespace js; @@ -27,6 +28,9 @@ JS::detail::CallMethodIfWrapped(JSContext* cx, IsAcceptableThis test, NativeImpl return Proxy::nativeCall(cx, test, impl, args); } + if (IsCallSelfHostedNonGenericMethod(impl)) + return ReportIncompatibleSelfHostedMethod(cx, args); + ReportIncompatible(cx, args); return false; } diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h index 4027ed06f3..8587a874cb 100644 --- a/js/src/vm/CommonPropertyNames.h +++ b/js/src/vm/CommonPropertyNames.h @@ -296,17 +296,15 @@ macro(iterator, iterator, "iterator") \ macro(match, match, "match") \ macro(species, species, "species") \ - macro(hasInstance, hasInstance, "hasInstance") \ macro(toPrimitive, toPrimitive, "toPrimitive") \ macro(unscopables, unscopables, "unscopables") \ /* Same goes for the descriptions of the well-known symbols. */ \ macro(Symbol_create, Symbol_create, "Symbol.create") \ - macro(Symbol_isConcatSpreadable, Symbol_isConcatSpreadable, "Symbol.isConcatSpreadable") \ - macro(Symbol_isRegExp, Symbol_isRegExp, "Symbol.isRegExp") \ - macro(Symbol_iterator, Symbol_iterator, "Symbol.iterator") \ - macro(Symbol_match, Symbol_match, "Symbol.match") \ - macro(Symbol_species, Symbol_species, "Symbol.species") \ macro(Symbol_hasInstance, Symbol_hasInstance, "Symbol.hasInstance") \ + macro(Symbol_isConcatSpreadable, Symbol_isConcatSpreadable, "Symbol.isConcatSpreadable") \ + macro(Symbol_iterator, Symbol_iterator, "Symbol.iterator") \ + macro(Symbol_match, Symbol_match, "Symbol.match") \ + macro(Symbol_species, Symbol_species, "Symbol.species") \ macro(Symbol_toPrimitive, Symbol_toPrimitive, "Symbol.toPrimitive") \ macro(Symbol_unscopables, Symbol_unscopables, "Symbol.unscopables") \ /* Function names for properties named by symbols. */ \ diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp index 7acf84f58d..ba457ea451 100644 --- a/js/src/vm/Debugger.cpp +++ b/js/src/vm/Debugger.cpp @@ -20,6 +20,7 @@ #include "jswrapper.h" #include "frontend/BytecodeCompiler.h" +#include "frontend/Parser.h" #include "gc/Marking.h" #include "jit/BaselineDebugModeOSR.h" #include "jit/BaselineJIT.h" @@ -248,6 +249,28 @@ class Debugger::FrameRange } }; +class AutoRestoreCompartmentDebugMode +{ + JSCompartment* comp_; + unsigned bits_; + + public: + explicit AutoRestoreCompartmentDebugMode(JSCompartment* comp) + : comp_(comp), bits_(comp->debugModeBits) + { + MOZ_ASSERT(comp_); + } + + ~AutoRestoreCompartmentDebugMode() { + if (comp_) + comp_->debugModeBits = bits_; + } + + void release() { + comp_ = nullptr; + } +}; + // Given a Debugger instance dbg, if it is enabled, prevents all its debuggee // compartments from executing scripts. Attempts to run script will throw an // instance of Debugger.DebuggeeWouldRun from the topmost locked Debugger's @@ -2295,27 +2318,6 @@ UpdateExecutionObservabilityOfScriptsInZone(JSContext* cx, Zone* zone, JSRuntime* rt = cx->runtime(); FreeOp* fop = cx->runtime()->defaultFreeOp(); - // Mark active baseline scripts in the observable set so that they don't - // get discarded. They will be recompiled. - for (JitActivationIterator actIter(rt); !actIter.done(); ++actIter) { - if (actIter->compartment()->zone() != zone) - continue; - - for (JitFrameIterator iter(actIter); !iter.done(); ++iter) { - switch (iter.type()) { - case JitFrame_BaselineJS: - MarkBaselineScriptActiveIfObservable(iter.script(), obs); - break; - case JitFrame_IonJS: - MarkBaselineScriptActiveIfObservable(iter.script(), obs); - for (InlineFrameIterator inlineIter(rt, &iter); inlineIter.more(); ++inlineIter) - MarkBaselineScriptActiveIfObservable(inlineIter.script(), obs); - break; - default:; - } - } - } - Vector scripts(cx); // Iterate through observable scripts, invalidating their Ion scripts and @@ -2340,6 +2342,30 @@ UpdateExecutionObservabilityOfScriptsInZone(JSContext* cx, Zone* zone, } } + // Code below this point must be infallible to ensure the active bit of + // BaselineScripts is in a consistent state. + // + // Mark active baseline scripts in the observable set so that they don't + // get discarded. They will be recompiled. + for (JitActivationIterator actIter(rt); !actIter.done(); ++actIter) { + if (actIter->compartment()->zone() != zone) + continue; + + for (JitFrameIterator iter(actIter); !iter.done(); ++iter) { + switch (iter.type()) { + case JitFrame_BaselineJS: + MarkBaselineScriptActiveIfObservable(iter.script(), obs); + break; + case JitFrame_IonJS: + MarkBaselineScriptActiveIfObservable(iter.script(), obs); + for (InlineFrameIterator inlineIter(rt, &iter); inlineIter.more(); ++inlineIter) + MarkBaselineScriptActiveIfObservable(inlineIter.script(), obs); + break; + default:; + } + } + } + // Iterate through the scripts again and finish discarding // BaselineScripts. This must be done as a separate phase as we can only // discard the BaselineScript on scripts that have no IonScript. @@ -2475,11 +2501,16 @@ Debugger::updateObservesAllExecutionOnDebuggees(JSContext* cx, IsObserving obser // so add the compartment to the set only if we are observing. if (observing && !obs.add(comp)) return false; - - comp->updateDebuggerObservesAllExecution(); } - return updateExecutionObservability(cx, obs, observing); + if (!updateExecutionObservability(cx, obs, observing)) + return false; + + typedef ExecutionObservableCompartments::CompartmentRange CompartmentRange; + for (CompartmentRange r = obs.compartments()->all(); !r.empty(); r.popFront()) + r.front()->updateDebuggerObservesAllExecution(); + + return true; } bool @@ -3039,10 +3070,14 @@ Debugger::setHookImpl(JSContext* cx, CallArgs& args, Debugger& dbg, Hook which) JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_NOT_CALLABLE_OR_UNDEFINED); return false; } - dbg.object->setReservedSlot(JSSLOT_DEBUG_HOOK_START + which, args[0]); + uint32_t slot = JSSLOT_DEBUG_HOOK_START + which; + RootedValue oldHook(cx, dbg.object->getReservedSlot(slot)); + dbg.object->setReservedSlot(slot, args[0]); if (hookObservesAllExecution(which)) { - if (!dbg.updateObservesAllExecutionOnDebuggees(cx, dbg.observesAllExecution())) + if (!dbg.updateObservesAllExecutionOnDebuggees(cx, dbg.observesAllExecution())) { + dbg.object->setReservedSlot(slot, oldHook); return false; + } } args.rval().setUndefined(); return true; @@ -3677,6 +3712,7 @@ Debugger::addDebuggeeGlobal(JSContext* cx, Handle global) }); // (6) + AutoRestoreCompartmentDebugMode debugModeGuard(debuggeeCompartment); debuggeeCompartment->setIsDebuggee(); debuggeeCompartment->updateDebuggerObservesAsmJS(); debuggeeCompartment->updateDebuggerObservesCoverage(); @@ -3688,6 +3724,7 @@ Debugger::addDebuggeeGlobal(JSContext* cx, Handle global) zoneDebuggersGuard.release(); debuggeeZonesGuard.release(); allocationsTrackingGuard.release(); + debugModeGuard.release(); return true; } @@ -4690,6 +4727,56 @@ Debugger::endTraceLogger(JSContext* cx, unsigned argc, Value* vp) return true; } +bool +Debugger::isCompilableUnit(JSContext* cx, unsigned argc, Value* vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + + if (!args.requireAtLeast(cx, "Debugger.isCompilableUnit", 1)) + return false; + + if (!args[0].isString()) { + JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, + JSMSG_NOT_EXPECTED_TYPE, "Debugger.isCompilableUnit", + "string", InformalValueTypeName(args[0])); + return false; + } + + JSString* str = args[0].toString(); + size_t length = GetStringLength(str); + + AutoStableStringChars chars(cx); + if (!chars.initTwoByte(cx, str)) + return false; + + bool result = true; + + CompileOptions options(cx); + frontend::Parser parser(cx, &cx->tempLifoAlloc(), + options, chars.twoByteChars(), + length, /* foldConstants = */ true, + nullptr, nullptr); + JSErrorReporter older = JS_SetErrorReporter(cx->runtime(), nullptr); + if (!parser.checkOptions() || !parser.parse()) { + // We ran into an error. If it was because we ran out of memory we report + // it in the usual way. + if (cx->isThrowingOutOfMemory()) { + JS_SetErrorReporter(cx->runtime(), older); + return false; + } + + // If it was because we ran out of source, we return false so our caller + // knows to try to collect more [source]. + if (parser.isUnexpectedEOF()) + result = false; + + cx->clearPendingException(); + } + JS_SetErrorReporter(cx->runtime(), older); + args.rval().setBoolean(result); + return true; +} + bool Debugger::drainTraceLoggerScriptCalls(JSContext* cx, unsigned argc, Value* vp) { @@ -4811,7 +4898,11 @@ const JSFunctionSpec Debugger::methods[] = { JS_FS_END }; - +const JSFunctionSpec Debugger::static_methods[] { + JS_FN("isCompilableUnit", Debugger::isCompilableUnit, 1, 0), + JS_FS_END +}; + /*** Debugger.Script *****************************************************************************/ static inline JSScript* @@ -5581,6 +5672,19 @@ Debugger::observesScript(JSScript* script) const Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePtr to, ScriptFrameIter& iter) { + auto removeFromDebuggerFramesOnExit = MakeScopeExit([&] { + // Remove any remaining old entries on exit, as the 'from' frame will + // be gone. On success, the range will be empty. + for (Debugger::FrameRange r(from); !r.empty(); r.popFront()) { + r.frontFrame()->setPrivate(nullptr); + r.removeFrontFrame(); + } + + // Rekey missingScopes to maintain Debugger.Environment identity and + // forward liveScopes to point to the new frame. + DebugScopes::forwardLiveFrame(cx, from, to); + }); + // Forward live Debugger.Frame objects. for (Debugger::FrameRange r(from); !r.empty(); r.popFront()) { RootedNativeObject frameobj(cx, r.frontFrame()); @@ -5594,7 +5698,7 @@ Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePt return false; frameobj->setPrivate(data); - // Remove the old entry before mutating the HashMap. + // Remove the old frame. r.removeFrontFrame(); // Add the frame object with |to| as key. @@ -5604,11 +5708,6 @@ Debugger::replaceFrameGuts(JSContext* cx, AbstractFramePtr from, AbstractFramePt } } - // Rekey missingScopes to maintain Debugger.Environment identity and - // forward liveScopes to point to the new frame, as the old frame will be - // gone. - DebugScopes::forwardLiveFrame(cx, from, to); - return true; } @@ -8589,8 +8688,8 @@ JS_DefineDebuggerObject(JSContext* cx, HandleObject obj) return false; debugProto = InitClass(cx, obj, objProto, &Debugger::jsclass, Debugger::construct, - 1, Debugger::properties, Debugger::methods, nullptr, nullptr, - debugCtor.address()); + 1, Debugger::properties, Debugger::methods, nullptr, + Debugger::static_methods, debugCtor.address()); if (!debugProto) return false; diff --git a/js/src/vm/Debugger.h b/js/src/vm/Debugger.h index d85c86dc2f..87c74626e0 100644 --- a/js/src/vm/Debugger.h +++ b/js/src/vm/Debugger.h @@ -585,6 +585,7 @@ class Debugger : private mozilla::LinkedListElement static bool drainTraceLoggerScriptCalls(JSContext* cx, unsigned argc, Value* vp); static bool startTraceLogger(JSContext* cx, unsigned argc, Value* vp); static bool endTraceLogger(JSContext* cx, unsigned argc, Value* vp); + static bool isCompilableUnit(JSContext* cx, unsigned argc, Value* vp); #ifdef NIGHTLY_BUILD static bool setupTraceLogger(JSContext* cx, unsigned argc, Value* vp); static bool drainTraceLogger(JSContext* cx, unsigned argc, Value* vp); @@ -592,6 +593,7 @@ class Debugger : private mozilla::LinkedListElement static bool construct(JSContext* cx, unsigned argc, Value* vp); static const JSPropertySpec properties[]; static const JSFunctionSpec methods[]; + static const JSFunctionSpec static_methods[]; static void removeFromFrameMapsAndClearBreakpointsIn(JSContext* cx, AbstractFramePtr frame); static bool updateExecutionObservabilityOfFrames(JSContext* cx, const ExecutionObservableSet& obs, diff --git a/js/src/vm/GlobalObject.cpp b/js/src/vm/GlobalObject.cpp index e24584be05..554f15fe98 100644 --- a/js/src/vm/GlobalObject.cpp +++ b/js/src/vm/GlobalObject.cpp @@ -90,8 +90,22 @@ js::GlobalObject::getTypedObjectModule() const { return v.toObject().as(); } - - +/* static */ bool +GlobalObject::skipDeselectedConstructor(JSContext* cx, JSProtoKey key) +{ +#ifdef ENABLE_SHARED_ARRAY_BUFFER + // Return true if the given constructor has been disabled at run-time. + switch (key) { + case JSProto_Atomics: + case JSProto_SharedArrayBuffer: + return !cx->compartment()->creationOptions().getSharedMemoryAndAtomicsEnabled(); + default: + return false; + } +#else + return false; +#endif +} /* static */ bool GlobalObject::ensureConstructor(JSContext* cx, Handle global, JSProtoKey key) @@ -118,6 +132,9 @@ GlobalObject::resolveConstructor(JSContext* cx, Handle global, JS if (!init && !clasp) return true; // JSProto_Null or a compile-time-disabled feature. + if (skipDeselectedConstructor(cx, key)) + return true; + // Some classes have no init routine, which means that they're disabled at // compile-time. We could try to enforce that callers never pass such keys // to resolveConstructor, but that would cramp the style of consumers like @@ -417,6 +434,7 @@ GlobalObject::initSelfHostingBuiltins(JSContext* cx, Handle globa return InitBareBuiltinCtor(cx, global, JSProto_Array) && InitBareBuiltinCtor(cx, global, JSProto_TypedArray) && InitBareBuiltinCtor(cx, global, JSProto_Uint8Array) && + InitBareBuiltinCtor(cx, global, JSProto_Int32Array) && InitBareWeakMapCtor(cx, global) && InitStopIterationClass(cx, global) && InitSelfHostingCollectionIteratorFunctions(cx, global) && @@ -661,7 +679,10 @@ GlobalObject::getSelfHostedFunction(JSContext* cx, Handle global, HandlePropertyName selfHostedName, HandleAtom name, unsigned nargs, MutableHandleValue funVal) { - if (GlobalObject::maybeGetIntrinsicValue(cx, global, selfHostedName, funVal)) { + bool exists = false; + if (!GlobalObject::maybeGetIntrinsicValue(cx, global, selfHostedName, funVal, &exists)) + return false; + if (exists) { RootedFunction fun(cx, &funVal.toObject().as()); if (fun->atom() == name) return true; @@ -725,3 +746,18 @@ GlobalObject::addIntrinsicValue(JSContext* cx, Handle global, holder->setSlot(shape->slot(), value); return true; } + +/* static */ bool +GlobalObject::ensureModulePrototypesCreated(JSContext *cx, Handle global) +{ + if (global->getSlot(MODULE_PROTO).isUndefined()) { + MOZ_ASSERT(global->getSlot(IMPORT_ENTRY_PROTO).isUndefined() && + global->getSlot(EXPORT_ENTRY_PROTO).isUndefined()); + if (!js::InitModuleClasses(cx, global)) + return false; + } + MOZ_ASSERT(global->getSlot(MODULE_PROTO).isObject() && + global->getSlot(IMPORT_ENTRY_PROTO).isObject() && + global->getSlot(EXPORT_ENTRY_PROTO).isObject()); + return true; +} diff --git a/js/src/vm/GlobalObject.h b/js/src/vm/GlobalObject.h index a7f9ceca6f..eb64ca5fb0 100644 --- a/js/src/vm/GlobalObject.h +++ b/js/src/vm/GlobalObject.h @@ -164,6 +164,7 @@ class GlobalObject : public NativeObject MOZ_ASSERT(key <= JSProto_LIMIT); return getSlot(APPLICATION_SLOTS + key); } + static bool skipDeselectedConstructor(JSContext* cx, JSProtoKey key); static bool ensureConstructor(JSContext* cx, Handle global, JSProtoKey key); static bool resolveConstructor(JSContext* cx, Handle global, JSProtoKey key); static bool initBuiltinConstructor(JSContext* cx, Handle global, @@ -467,16 +468,39 @@ class GlobalObject : public NativeObject return getOrCreateObject(cx, DATE_TIME_FORMAT_PROTO, initDateTimeFormatProto); } + static bool ensureModulePrototypesCreated(JSContext *cx, Handle global); + + JSObject* maybeGetModulePrototype() { + Value value = getSlot(MODULE_PROTO); + return value.isUndefined() ? nullptr : &value.toObject(); + } + + JSObject* maybeGetImportEntryPrototype() { + Value value = getSlot(IMPORT_ENTRY_PROTO); + return value.isUndefined() ? nullptr : &value.toObject(); + } + + JSObject* maybeGetExportEntryPrototype() { + Value value = getSlot(EXPORT_ENTRY_PROTO); + return value.isUndefined() ? nullptr : &value.toObject(); + } + JSObject* getModulePrototype() { - return &getSlot(MODULE_PROTO).toObject(); + JSObject* proto = maybeGetModulePrototype(); + MOZ_ASSERT(proto); + return proto; } JSObject* getImportEntryPrototype() { - return &getSlot(IMPORT_ENTRY_PROTO).toObject(); + JSObject* proto = maybeGetImportEntryPrototype(); + MOZ_ASSERT(proto); + return proto; } JSObject* getExportEntryPrototype() { - return &getSlot(EXPORT_ENTRY_PROTO).toObject(); + JSObject* proto = maybeGetExportEntryPrototype(); + MOZ_ASSERT(proto); + return proto; } static JSFunction* @@ -600,7 +624,7 @@ class GlobalObject : public NativeObject static bool maybeGetIntrinsicValue(JSContext* cx, Handle global, Handle name, - MutableHandleValue vp) + MutableHandleValue vp, bool* exists) { NativeObject* holder = getIntrinsicsHolder(cx, global); if (!holder) @@ -608,15 +632,21 @@ class GlobalObject : public NativeObject if (Shape* shape = holder->lookupPure(name)) { vp.set(holder->getSlot(shape->slot())); - return true; + *exists = true; + } else { + *exists = false; } - return false; + + return true; } static bool getIntrinsicValue(JSContext* cx, Handle global, HandlePropertyName name, MutableHandleValue value) { - if (GlobalObject::maybeGetIntrinsicValue(cx, global, name, value)) + bool exists = false; + if (!GlobalObject::maybeGetIntrinsicValue(cx, global, name, value, &exists)) + return false; + if (exists) return true; if (!cx->runtime()->cloneSelfHostedValue(cx, name, value)) return false; diff --git a/js/src/vm/HelperThreads.cpp b/js/src/vm/HelperThreads.cpp index b3203a98e3..b7030aa8c8 100644 --- a/js/src/vm/HelperThreads.cpp +++ b/js/src/vm/HelperThreads.cpp @@ -195,10 +195,10 @@ static const JSClass parseTaskGlobalClass = { JS_GlobalObjectTraceHook }; -ParseTask::ParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, JSContext* initCx, - const char16_t* chars, size_t length, +ParseTask::ParseTask(ParseTaskKind kind, ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + JSContext* initCx, const char16_t* chars, size_t length, JS::OffThreadCompileCallback callback, void* callbackData) - : cx(cx), options(initCx), chars(chars), length(length), + : kind(kind), cx(cx), options(initCx), chars(chars), length(length), alloc(JSRuntime::TEMP_LIFO_ALLOC_PRIMARY_CHUNK_SIZE), exclusiveContextGlobal(initCx->runtime(), exclusiveContextGlobal), callback(callback), callbackData(callbackData), @@ -244,6 +244,52 @@ ParseTask::~ParseTask() js_delete(errors[i]); } +ScriptParseTask::ScriptParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + JSContext* initCx, const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData) + : ParseTask(ParseTaskKind::Script, cx, exclusiveContextGlobal, initCx, chars, length, callback, + callbackData) +{ +} + +void +ScriptParseTask::parse() +{ + SourceBufferHolder srcBuf(chars, length, SourceBufferHolder::NoOwnership); + + // ! WARNING WARNING WARNING ! + // + // See comment in Parser::bindLexical about optimizing global lexical + // bindings. If we start optimizing them, passing in task->cx's + // global lexical scope would be incorrect! + // + // ! WARNING WARNING WARNING ! + Rooted globalLexical(cx, &cx->global()->lexicalScope()); + Rooted staticScope(cx, &globalLexical->staticBlock()); + script = frontend::CompileScript(cx, &alloc, globalLexical, staticScope, nullptr, + options, srcBuf, + /* source_ = */ nullptr, + /* extraSct = */ nullptr, + /* sourceObjectOut = */ sourceObject.address()); +} + +ModuleParseTask::ModuleParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + JSContext* initCx, const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData) + : ParseTask(ParseTaskKind::Module, cx, exclusiveContextGlobal, initCx, chars, length, callback, + callbackData) +{ +} + +void +ModuleParseTask::parse() +{ + SourceBufferHolder srcBuf(chars, length, SourceBufferHolder::NoOwnership); + ModuleObject* module = frontend::CompileModule(cx, options, srcBuf, &alloc); + if (module) + script = module->script(); +} + void js::CancelOffThreadParses(JSRuntime* rt) { @@ -285,7 +331,8 @@ js::CancelOffThreadParses(JSRuntime* rt) if (task->runtimeMatches(rt)) { found = true; AutoUnlockHelperThreadState unlock; - HelperThreadState().finishParseTask(/* maybecx = */ nullptr, rt, task); + HelperThreadState().finishParseTask(/* maybecx = */ nullptr, rt, task->kind, + task); } } if (!found) @@ -319,7 +366,7 @@ EnsureConstructor(JSContext* cx, Handle global, JSProtoKey key) // Initialize all classes potentially created during parsing for use in parser // data structures, template objects, &c. static bool -EnsureParserCreatedClasses(JSContext* cx) +EnsureParserCreatedClasses(JSContext* cx, ParseTaskKind kind) { Handle global = cx->global(); @@ -338,18 +385,15 @@ EnsureParserCreatedClasses(JSContext* cx) if (!GlobalObject::initStarGenerators(cx, global)) return false; // needed by function*() {} and generator comprehensions + if (kind == ParseTaskKind::Module && !GlobalObject::ensureModulePrototypesCreated(cx, global)) + return false; + return true; } -bool -js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options, - const char16_t* chars, size_t length, - JS::OffThreadCompileCallback callback, void* callbackData) +static JSObject* +CreateGlobalForOffThreadParse(JSContext* cx, ParseTaskKind kind, const gc::AutoSuppressGC& nogc) { - // Suppress GC so that calls below do not trigger a new incremental GC - // which could require barriers on the atoms compartment. - gc::AutoSuppressGC suppress(cx); - JSCompartment* currentCompartment = cx->compartment(); JS::CompartmentOptions compartmentOptions(currentCompartment->creationOptions(), @@ -367,21 +411,60 @@ js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& optio JSObject* global = JS_NewGlobalObject(cx, &parseTaskGlobalClass, nullptr, JS::FireOnNewGlobalHook, compartmentOptions); if (!global) - return false; + return nullptr; JS_SetCompartmentPrincipals(global->compartment(), currentCompartment->principals()); // Initialize all classes required for parsing while still on the main // thread, for both the target and the new global so that prototype // pointers can be changed infallibly after parsing finishes. - if (!EnsureParserCreatedClasses(cx)) - return false; + if (!EnsureParserCreatedClasses(cx, kind)) + return nullptr; { AutoCompartment ac(cx, global); - if (!EnsureParserCreatedClasses(cx)) - return false; + if (!EnsureParserCreatedClasses(cx, kind)) + return nullptr; } + return global; +} + +static bool +QueueOffThreadParseTask(JSContext* cx, ParseTask* task) +{ + if (OffThreadParsingMustWaitForGC(cx->runtime())) { + AutoLockHelperThreadState lock; + if (!HelperThreadState().parseWaitingOnGC().append(task)) { + ReportOutOfMemory(cx); + return false; + } + } else { + AutoLockHelperThreadState lock; + if (!HelperThreadState().parseWorklist().append(task)) { + ReportOutOfMemory(cx); + return false; + } + + task->activate(cx->runtime()); + HelperThreadState().notifyOne(GlobalHelperThreadState::PRODUCER); + } + + return true; +} + +bool +js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options, + const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData) +{ + // Suppress GC so that calls below do not trigger a new incremental GC + // which could require barriers on the atoms compartment. + gc::AutoSuppressGC nogc(cx); + + JSObject* global = CreateGlobalForOffThreadParse(cx, ParseTaskKind::Script, nogc); + if (!global) + return false; + ScopedJSDeletePtr helpercx( cx->new_(cx->runtime(), (PerThreadData*) nullptr, ExclusiveContext::Context_Exclusive)); @@ -389,32 +472,50 @@ js::StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& optio return false; ScopedJSDeletePtr task( - cx->new_(helpercx.get(), global, cx, chars, length, - callback, callbackData)); + cx->new_(helpercx.get(), global, cx, chars, length, + callback, callbackData)); if (!task) return false; helpercx.forget(); - if (!task->init(cx, options)) + if (!task->init(cx, options) || !QueueOffThreadParseTask(cx, task)) return false; - if (OffThreadParsingMustWaitForGC(cx->runtime())) { - AutoLockHelperThreadState lock; - if (!HelperThreadState().parseWaitingOnGC().append(task.get())) { - ReportOutOfMemory(cx); - return false; - } - } else { - AutoLockHelperThreadState lock; - if (!HelperThreadState().parseWorklist().append(task.get())) { - ReportOutOfMemory(cx); - return false; - } + task.forget(); - task->activate(cx->runtime()); - HelperThreadState().notifyOne(GlobalHelperThreadState::PRODUCER); - } + return true; +} + +bool +js::StartOffThreadParseModule(JSContext* cx, const ReadOnlyCompileOptions& options, + const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData) +{ + // Suppress GC so that calls below do not trigger a new incremental GC + // which could require barriers on the atoms compartment. + gc::AutoSuppressGC nogc(cx); + + JSObject* global = CreateGlobalForOffThreadParse(cx, ParseTaskKind::Module, nogc); + if (!global) + return false; + + ScopedJSDeletePtr helpercx( + cx->new_(cx->runtime(), (PerThreadData*) nullptr, + ExclusiveContext::Context_Exclusive)); + if (!helpercx) + return false; + + ScopedJSDeletePtr task( + cx->new_(helpercx.get(), global, cx, chars, length, + callback, callbackData)); + if (!task) + return false; + + helpercx.forget(); + + if (!task->init(cx, options) || !QueueOffThreadParseTask(cx, task)) + return false; task.forget(); @@ -1013,7 +1114,8 @@ LeaveParseTaskZone(JSRuntime* rt, ParseTask* task) } JSScript* -GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void* token) +GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, ParseTaskKind kind, + void* token) { ScopedJSDeletePtr parseTask; @@ -1031,6 +1133,7 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void } } MOZ_ASSERT(parseTask); + MOZ_ASSERT(parseTask->kind == kind); if (!maybecx) { LeaveParseTaskZone(rt, parseTask); @@ -1043,7 +1146,7 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void // Make sure we have all the constructors we need for the prototype // remapping below, since we can't GC while that's happening. Rooted global(cx, &cx->global()->as()); - if (!EnsureParserCreatedClasses(cx)) { + if (!EnsureParserCreatedClasses(cx, kind)) { LeaveParseTaskZone(rt, parseTask); return nullptr; } @@ -1089,6 +1192,34 @@ GlobalHelperThreadState::finishParseTask(JSContext* maybecx, JSRuntime* rt, void return script; } +JSScript* +GlobalHelperThreadState::finishScriptParseTask(JSContext* maybecx, JSRuntime* rt, void* token) +{ + JSScript* script = finishParseTask(maybecx, rt, ParseTaskKind::Script, token); + MOZ_ASSERT_IF(script, script->isGlobalCode()); + return script; +} + +JSObject* +GlobalHelperThreadState::finishModuleParseTask(JSContext* maybecx, JSRuntime* rt, void* token) +{ + JSScript* script = finishParseTask(maybecx, rt, ParseTaskKind::Module, token); + if (!script) + return nullptr; + + MOZ_ASSERT(script->module()); + if (!maybecx) + return nullptr; + + JSContext* cx = maybecx; + RootedModuleObject module(cx, script->module()); + module->fixScopesAfterCompartmentMerge(cx); + if (!ModuleObject::FreezeArrayProperties(cx, module)) + return nullptr; + + return module; +} + JSObject* GlobalObject::getStarGeneratorFunctionPrototype() { @@ -1116,8 +1247,13 @@ GlobalHelperThreadState::mergeParseTaskCompartment(JSRuntime* rt, ParseTask* par // Generator functions don't have Function.prototype as prototype but a // different function object, so the IdentifyStandardPrototype trick // below won't work. Just special-case it. - JSObject* parseTaskStarGenFunctionProto = - parseTask->exclusiveContextGlobal->as().getStarGeneratorFunctionPrototype(); + GlobalObject* parseGlobal = &parseTask->exclusiveContextGlobal->as(); + JSObject* parseTaskStarGenFunctionProto = parseGlobal->getStarGeneratorFunctionPrototype(); + + // Module objects don't have standard prototypes either. + JSObject* moduleProto = parseGlobal->maybeGetModulePrototype(); + JSObject* importEntryProto = parseGlobal->maybeGetImportEntryPrototype(); + JSObject* exportEntryProto = parseGlobal->maybeGetExportEntryPrototype(); // Point the prototypes of any objects in the script's compartment to refer // to the corresponding prototype in the new compartment. This will briefly @@ -1132,21 +1268,24 @@ GlobalHelperThreadState::mergeParseTaskCompartment(JSRuntime* rt, ParseTask* par JSObject* protoObj = proto.toObject(); JSObject* newProto; - if (protoObj == parseTaskStarGenFunctionProto) { - newProto = global->getStarGeneratorFunctionPrototype(); - } else { - JSProtoKey key = JS::IdentifyStandardPrototype(protoObj); - if (key == JSProto_Null) - continue; - + JSProtoKey key = JS::IdentifyStandardPrototype(protoObj); + if (key != JSProto_Null) { MOZ_ASSERT(key == JSProto_Object || key == JSProto_Array || key == JSProto_Function || key == JSProto_RegExp || key == JSProto_Iterator); - newProto = GetBuiltinPrototypePure(global, key); + } else if (protoObj == parseTaskStarGenFunctionProto) { + newProto = global->getStarGeneratorFunctionPrototype(); + } else if (protoObj == moduleProto) { + newProto = global->getModulePrototype(); + } else if (protoObj == importEntryProto) { + newProto = global->getImportEntryPrototype(); + } else if (protoObj == exportEntryProto) { + newProto = global->getExportEntryPrototype(); + } else { + continue; } - MOZ_ASSERT(newProto); group->setProtoUnchecked(TaggedProto(newProto)); } } @@ -1387,25 +1526,7 @@ HelperThread::handleParseWorkload() AutoUnlockHelperThreadState unlock; PerThreadData::AutoEnterRuntime enter(threadData.ptr(), task->exclusiveContextGlobal->runtimeFromAnyThread()); - SourceBufferHolder srcBuf(task->chars, task->length, - SourceBufferHolder::NoOwnership); - - // ! WARNING WARNING WARNING ! - // - // See comment in Parser::bindLexical about optimizing global lexical - // bindings. If we start optimizing them, passing in task->cx's - // global lexical scope would be incorrect! - // - // ! WARNING WARNING WARNING ! - ExclusiveContext* parseCx = task->cx; - Rooted globalLexical(parseCx, &parseCx->global()->lexicalScope()); - Rooted staticScope(parseCx, &globalLexical->staticBlock()); - task->script = frontend::CompileScript(parseCx, &task->alloc, - globalLexical, staticScope, nullptr, - task->options, srcBuf, - /* source_ = */ nullptr, - /* extraSct = */ nullptr, - /* sourceObjectOut = */ task->sourceObject.address()); + task->parse(); } // The callback is invoked while we are still off the main thread. diff --git a/js/src/vm/HelperThreads.h b/js/src/vm/HelperThreads.h index 5eae2d21eb..76fccf8f56 100644 --- a/js/src/vm/HelperThreads.h +++ b/js/src/vm/HelperThreads.h @@ -37,6 +37,12 @@ namespace wasm { typedef Vector IonCompileTaskVector; } // namespace wasm +enum class ParseTaskKind +{ + Script, + Module +}; + // Per-process state for off thread work items. class GlobalHelperThreadState { @@ -219,6 +225,11 @@ class GlobalHelperThreadState return bool(numWasmFailedJobs); } + JSScript* finishParseTask(JSContext* maybecx, JSRuntime* rt, ParseTaskKind kind, void* token); + void mergeParseTaskCompartment(JSRuntime* rt, ParseTask* parseTask, + Handle global, + JSCompartment* dest); + private: /* * Number of wasm jobs that encountered failure for the active module. @@ -227,10 +238,8 @@ class GlobalHelperThreadState uint32_t numWasmFailedJobs; public: - JSScript* finishParseTask(JSContext* maybecx, JSRuntime* rt, void* token); - void mergeParseTaskCompartment(JSRuntime* rt, ParseTask* parseTask, - Handle global, - JSCompartment* dest); + JSScript* finishScriptParseTask(JSContext* maybecx, JSRuntime* rt, void* token); + JSObject* finishModuleParseTask(JSContext* maybecx, JSRuntime* rt, void* token); bool compressionInProgress(SourceCompressionTask* task); SourceCompressionTask* compressionTaskForSource(ScriptSource* ss); @@ -410,6 +419,11 @@ StartOffThreadParseScript(JSContext* cx, const ReadOnlyCompileOptions& options, const char16_t* chars, size_t length, JS::OffThreadCompileCallback callback, void* callbackData); +bool +StartOffThreadParseModule(JSContext* cx, const ReadOnlyCompileOptions& options, + const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData); + /* * Called at the end of GC to enqueue any Parse tasks that were waiting on an * atoms-zone GC to finish. @@ -463,6 +477,7 @@ class MOZ_RAII AutoUnlockHelperThreadState struct ParseTask { + ParseTaskKind kind; ExclusiveContext* cx; OwningCompileOptions options; const char16_t* chars; @@ -490,19 +505,36 @@ struct ParseTask bool overRecursed; bool outOfMemory; - ParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + ParseTask(ParseTaskKind kind, ExclusiveContext* cx, JSObject* exclusiveContextGlobal, JSContext* initCx, const char16_t* chars, size_t length, JS::OffThreadCompileCallback callback, void* callbackData); bool init(JSContext* cx, const ReadOnlyCompileOptions& options); void activate(JSRuntime* rt); + virtual void parse() = 0; bool finish(JSContext* cx); bool runtimeMatches(JSRuntime* rt) { return exclusiveContextGlobal->runtimeFromAnyThread() == rt; } - ~ParseTask(); + virtual ~ParseTask(); +}; + +struct ScriptParseTask : public ParseTask +{ + ScriptParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + JSContext* initCx, const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData); + void parse() override; +}; + +struct ModuleParseTask : public ParseTask +{ + ModuleParseTask(ExclusiveContext* cx, JSObject* exclusiveContextGlobal, + JSContext* initCx, const char16_t* chars, size_t length, + JS::OffThreadCompileCallback callback, void* callbackData); + void parse() override; }; // Return whether, if a new parse task was started, it would need to wait for diff --git a/js/src/vm/NativeObject.cpp b/js/src/vm/NativeObject.cpp index b2c01e91cf..06b1d6b70d 100644 --- a/js/src/vm/NativeObject.cpp +++ b/js/src/vm/NativeObject.cpp @@ -1345,6 +1345,11 @@ js::NativeDefineProperty(ExclusiveContext* cx, HandleNativeObject obj, HandleId if ((desc_.attributes() & JSPROP_RESOLVING) == 0) obj->as().markLengthOverridden(); } + if (JSID_IS_SYMBOL(id) && JSID_TO_SYMBOL(id) == cx->wellKnownSymbols().iterator) { + // Do same thing as .length for [@@iterator]. + if ((desc_.attributes() & JSPROP_RESOLVING) == 0) + obj->as().markIteratorOverridden(); + } } // 9.1.6.1 OrdinaryDefineOwnProperty steps 1-2. diff --git a/js/src/vm/SavedStacks.cpp b/js/src/vm/SavedStacks.cpp index 636a8c0046..0937c14c9f 100644 --- a/js/src/vm/SavedStacks.cpp +++ b/js/src/vm/SavedStacks.cpp @@ -1028,10 +1028,10 @@ SavedStacks::copyAsyncStack(JSContext* cx, HandleObject asyncStack, HandleString } void -SavedStacks::sweep(JSRuntime* rt) +SavedStacks::sweep() { frames.sweep(); - sweepPCLocationMap(); + pcLocationMap.sweep(); } void @@ -1303,24 +1303,6 @@ SavedStacks::createFrameFromLookup(JSContext* cx, SavedFrame::HandleLookup looku return frame; } -/* - * Remove entries from the table whose JSScript is being collected. - */ -void -SavedStacks::sweepPCLocationMap() -{ - for (PCLocationMap::Enum e(pcLocationMap); !e.empty(); e.popFront()) { - PCKey key = e.front().key(); - JSScript* script = key.script.get(); - if (IsAboutToBeFinalizedUnbarriered(&script)) { - e.removeFront(); - } else if (script != key.script.get()) { - key.script = script; - e.rekeyFront(key); - } - } -} - bool SavedStacks::getLocation(JSContext* cx, const FrameIter& iter, MutableHandle locationp) diff --git a/js/src/vm/SavedStacks.h b/js/src/vm/SavedStacks.h index 2abd9a383f..eae6b91fb1 100644 --- a/js/src/vm/SavedStacks.h +++ b/js/src/vm/SavedStacks.h @@ -167,7 +167,7 @@ class SavedStacks { bool saveCurrentStack(JSContext* cx, MutableHandleSavedFrame frame, unsigned maxFrameCount = 0); bool copyAsyncStack(JSContext* cx, HandleObject asyncStack, HandleString asyncCause, MutableHandleSavedFrame adoptedStack, unsigned maxFrameCount = 0); - void sweep(JSRuntime* rt); + void sweep(); void trace(JSTracer* trc); uint32_t count(); void clear(); @@ -220,28 +220,32 @@ class SavedStacks { struct PCKey { PCKey(JSScript* script, jsbytecode* pc) : script(script), pc(pc) { } - PreBarrieredScript script; - jsbytecode* pc; + RelocatablePtrScript script; + jsbytecode* pc; + + bool needsSweep() { return IsAboutToBeFinalized(&script); } }; public: struct LocationValue { LocationValue() : source(nullptr), line(0), column(0) { } LocationValue(JSAtom* source, size_t line, uint32_t column) - : source(source), - line(line), - column(column) + : source(source), line(line), column(column) { } - static void trace(LocationValue* self, JSTracer* trc) { self->trace(trc); } void trace(JSTracer* trc) { if (source) TraceEdge(trc, &source, "SavedStacks::LocationValue::source"); } - PreBarrieredAtom source; - size_t line; - uint32_t column; + bool needsSweep() { + MOZ_ASSERT(source); + return !IsAboutToBeFinalized(&source); + } + + RelocatablePtrAtom source; + size_t line; + uint32_t column; }; template @@ -264,8 +268,8 @@ class SavedStacks { private: struct PCLocationHasher : public DefaultHasher { - typedef PointerHasher ScriptPtrHasher; - typedef PointerHasher BytecodePtrHasher; + using ScriptPtrHasher = DefaultHasher; + using BytecodePtrHasher = DefaultHasher; static HashNumber hash(const PCKey& key) { return mozilla::AddToHash(ScriptPtrHasher::hash(key.script), @@ -273,15 +277,14 @@ class SavedStacks { } static bool match(const PCKey& l, const PCKey& k) { - return l.script == k.script && l.pc == k.pc; + return ScriptPtrHasher::match(l.script, k.script) && + BytecodePtrHasher::match(l.pc, k.pc); } }; - typedef HashMap PCLocationMap; - + using PCLocationMap = GCHashMap; PCLocationMap pcLocationMap; - void sweepPCLocationMap(); bool getLocation(JSContext* cx, const FrameIter& iter, MutableHandle locationp); }; diff --git a/js/src/vm/ScopeObject.cpp b/js/src/vm/ScopeObject.cpp index 7113746c2c..073dff8d4b 100644 --- a/js/src/vm/ScopeObject.cpp +++ b/js/src/vm/ScopeObject.cpp @@ -1479,6 +1479,13 @@ ScopeIter::settle() if (frame_ && frame_.isFunctionFrame() && frame_.callee()->needsCallObject() && !frame_.hasCallObj()) { + // At the very start of a script that starts with a lexical block, the + // static scope will be that block. Skip it if this is the case. + if (ssi_.type() == StaticScopeIter::Block) + incrementStaticScopeIter(); + + // We should now be at the function scope, so since we have no + // CallObject, skip that too. MOZ_ASSERT(ssi_.type() == StaticScopeIter::Function); incrementStaticScopeIter(); } @@ -1486,8 +1493,14 @@ ScopeIter::settle() // Check for trying to iterate a strict eval frame before the prologue has // created the CallObject. if (frame_ && frame_.isStrictEvalFrame() && !frame_.hasCallObj() && !ssi_.done()) { + // Eval frames always have their own lexical scope. Analogous to the + // function frame case above, if the script starts with a lexical + // block, the SSI could see 2 block scopes here. So skip between 1-2 + // static block scopes here. MOZ_ASSERT(ssi_.type() == StaticScopeIter::Block); incrementStaticScopeIter(); + if (ssi_.type() == StaticScopeIter::Block) + incrementStaticScopeIter(); MOZ_ASSERT(ssi_.type() == StaticScopeIter::Eval); MOZ_ASSERT(maybeStaticScope() == frame_.script()->enclosingStaticScope()); incrementStaticScopeIter(); diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 93d0fd7720..d2222c68ef 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -780,6 +780,64 @@ intrinsic_ArrayBufferCopyData(JSContext* cx, unsigned argc, Value* vp) return true; } +static bool +intrinsic_IsSpecificTypedArray(JSContext* cx, unsigned argc, Value* vp, Scalar::Type type) +{ + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 1); + MOZ_ASSERT(args[0].isObject()); + + JSObject* obj = &args[0].toObject(); + + bool isArray = JS_GetArrayBufferViewType(obj) == type; + + args.rval().setBoolean(isArray); + return true; +} + +static bool +intrinsic_IsUint8TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint8) || + intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint8Clamped); +} + +static bool +intrinsic_IsInt8TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int8); +} + +static bool +intrinsic_IsUint16TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint16); +} + +static bool +intrinsic_IsInt16TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int16); +} + +static bool +intrinsic_IsUint32TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Uint32); +} + +static bool +intrinsic_IsInt32TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Int32); +} + +static bool +intrinsic_IsFloat32TypedArray(JSContext* cx, unsigned argc, Value* vp) +{ + return intrinsic_IsSpecificTypedArray(cx, argc, vp, Scalar::Float32); +} + static bool intrinsic_IsPossiblyWrappedTypedArray(JSContext* cx, unsigned argc, Value* vp) { @@ -1327,6 +1385,34 @@ CallNonGenericSelfhostedMethod(JSContext* cx, unsigned argc, Value* vp) return CallNonGenericMethod(cx, args); } +bool +js::IsCallSelfHostedNonGenericMethod(NativeImpl impl) +{ + return impl == CallSelfHostedNonGenericMethod; +} + +bool +js::ReportIncompatibleSelfHostedMethod(JSContext* cx, const CallArgs& args) +{ + // The contract for this function is the same as CallSelfHostedNonGenericMethod. + // The normal ReportIncompatible function doesn't work for selfhosted functions, + // because they always call the different CallXXXMethodIfWrapped methods, + // which would be reported as the called function instead. + + // Lookup the selfhosted method that was invoked. + ScriptFrameIter iter(cx); + MOZ_ASSERT(iter.isFunctionFrame()); + + JSAutoByteString funNameBytes; + if (const char* funName = GetFunctionNameBytes(cx, iter.callee(cx), &funNameBytes)) { + JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_METHOD, + funName, "method", InformalValueTypeName(args.thisv())); + } + + return false; +} + + /** * Returns the default locale as a well-formed, but not necessarily canonicalized, * BCP-47 language tag. @@ -1768,6 +1854,13 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("ArrayBufferByteLength", intrinsic_ArrayBufferByteLength, 1,0), JS_FN("ArrayBufferCopyData", intrinsic_ArrayBufferCopyData, 4,0), + JS_FN("IsUint8TypedArray", intrinsic_IsUint8TypedArray, 1,0), + JS_FN("IsInt8TypedArray", intrinsic_IsInt8TypedArray, 1,0), + JS_FN("IsUint16TypedArray", intrinsic_IsUint16TypedArray, 1,0), + JS_FN("IsInt16TypedArray", intrinsic_IsInt16TypedArray, 1,0), + JS_FN("IsUint32TypedArray", intrinsic_IsUint32TypedArray, 1,0), + JS_FN("IsInt32TypedArray", intrinsic_IsInt32TypedArray, 1,0), + JS_FN("IsFloat32TypedArray", intrinsic_IsFloat32TypedArray, 1,0), JS_INLINABLE_FN("IsTypedArray", intrinsic_IsInstanceOfBuiltin, 1,0, IntrinsicIsTypedArray), diff --git a/js/src/vm/SelfHosting.h b/js/src/vm/SelfHosting.h index dcd18708a5..7e2055469d 100644 --- a/js/src/vm/SelfHosting.h +++ b/js/src/vm/SelfHosting.h @@ -8,6 +8,7 @@ #define vm_SelfHosting_h_ #include "jsapi.h" +#include "NamespaceImports.h" class JSAtom; @@ -17,7 +18,14 @@ namespace js { * Check whether the given JSFunction is a self-hosted function whose * self-hosted name is the given name. */ -bool IsSelfHostedFunctionWithName(JSFunction* fun, JSAtom* name); +bool +IsSelfHostedFunctionWithName(JSFunction* fun, JSAtom* name); + +bool +IsCallSelfHostedNonGenericMethod(NativeImpl impl); + +bool +ReportIncompatibleSelfHostedMethod(JSContext* cx, const CallArgs& args); /* Get the compile options used when compiling self hosted code. */ void diff --git a/js/src/vm/SharedArrayObject.cpp b/js/src/vm/SharedArrayObject.cpp index 9dfcf13aef..afdb739350 100644 --- a/js/src/vm/SharedArrayObject.cpp +++ b/js/src/vm/SharedArrayObject.cpp @@ -164,15 +164,15 @@ SharedArrayRawBuffer::New(JSContext* cx, uint32_t length) void SharedArrayRawBuffer::addReference() { - MOZ_ASSERT(this->refcount > 0); - ++this->refcount; // Atomic. + MOZ_ASSERT(this->refcount_ > 0); + ++this->refcount_; // Atomic. } void SharedArrayRawBuffer::dropReference() { // Drop the reference to the buffer. - uint32_t refcount = --this->refcount; // Atomic. + uint32_t refcount = --this->refcount_; // Atomic. // If this was the final reference, release the buffer. if (refcount == 0) { @@ -330,7 +330,15 @@ SharedArrayBufferObject::Finalize(FreeOp* fop, JSObject* obj) SharedArrayBufferObject::addSizeOfExcludingThis(JSObject* obj, mozilla::MallocSizeOf mallocSizeOf, JS::ClassInfo* info) { - info->objectsNonHeapElementsMapped += obj->as().byteLength(); + // Divide the buffer size by the refcount to get the fraction of the buffer + // owned by this thread. It's conceivable that the refcount might change in + // the middle of memory reporting, in which case the amount reported for + // some threads might be to high (if the refcount goes up) or too low (if + // the refcount goes down). But that's unlikely and hard to avoid, so we + // just live with the risk. + const SharedArrayBufferObject& buf = obj->as(); + info->objectsNonHeapElementsShared += + buf.byteLength() / buf.rawBufferObject()->refcount(); } const Class SharedArrayBufferObject::protoClass = { diff --git a/js/src/vm/SharedArrayObject.h b/js/src/vm/SharedArrayObject.h index a2db1040a5..7e7b15f96d 100644 --- a/js/src/vm/SharedArrayObject.h +++ b/js/src/vm/SharedArrayObject.h @@ -44,7 +44,7 @@ class FutexWaiter; class SharedArrayRawBuffer { private: - mozilla::Atomic refcount; + mozilla::Atomic refcount_; uint32_t length; // A list of structures representing tasks waiting on some @@ -53,7 +53,7 @@ class SharedArrayRawBuffer protected: SharedArrayRawBuffer(uint8_t* buffer, uint32_t length) - : refcount(1), + : refcount_(1), length(length), waiters_(nullptr) { @@ -84,6 +84,8 @@ class SharedArrayRawBuffer return length; } + uint32_t refcount() const { return refcount_; } + void addReference(); void dropReference(); }; diff --git a/js/src/vm/Stack.cpp b/js/src/vm/Stack.cpp index cf06be729b..727e24176b 100644 --- a/js/src/vm/Stack.cpp +++ b/js/src/vm/Stack.cpp @@ -18,6 +18,7 @@ #include "js/GCAPI.h" #include "vm/Debugger.h" #include "vm/Opcodes.h" +#include "vm/ScopeObject.h" #include "jit/JitFrameIterator-inl.h" #include "vm/Interpreter-inl.h" diff --git a/js/xpconnect/src/XPCJSRuntime.cpp b/js/xpconnect/src/XPCJSRuntime.cpp index 94d19cfac6..1395ee89ad 100644 --- a/js/xpconnect/src/XPCJSRuntime.cpp +++ b/js/xpconnect/src/XPCJSRuntime.cpp @@ -2166,10 +2166,10 @@ ReportClassStats(const ClassInfo& classInfo, const nsACString& path, "Non-fixed object slots."); } - if (classInfo.objectsMallocHeapElementsNonAsmJS > 0) { - REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/malloc-heap/elements/non-asm.js"), - KIND_HEAP, classInfo.objectsMallocHeapElementsNonAsmJS, - "Non-asm.js indexed elements."); + if (classInfo.objectsMallocHeapElementsNormal > 0) { + REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/malloc-heap/elements/normal"), + KIND_HEAP, classInfo.objectsMallocHeapElementsNormal, + "Normal (non-asm.js) indexed elements."); } // asm.js arrays are heap-allocated on some platforms and @@ -2192,10 +2192,18 @@ ReportClassStats(const ClassInfo& classInfo, const nsACString& path, "the GC heap."); } - if (classInfo.objectsNonHeapElementsMapped > 0) { - REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/mapped"), - KIND_NONHEAP, classInfo.objectsNonHeapElementsMapped, - "Memory-mapped array buffer elements."); + if (classInfo.objectsNonHeapElementsNormal > 0) { + REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/normal"), + KIND_NONHEAP, classInfo.objectsNonHeapElementsNormal, + "Memory-mapped non-shared array buffer elements."); + } + + if (classInfo.objectsNonHeapElementsShared > 0) { + REPORT_BYTES(path + NS_LITERAL_CSTRING("objects/non-heap/elements/shared"), + KIND_NONHEAP, classInfo.objectsNonHeapElementsShared, + "Memory-mapped shared array buffer elements. These elements are " + "shared between one or more runtimes; the reported size is divided " + "by the buffer's refcount."); } if (classInfo.objectsNonHeapCodeAsmJS > 0) { diff --git a/layout/base/FramePropertyTable.cpp b/layout/base/FramePropertyTable.cpp index 12a1dcc3de..0fd9b1c377 100644 --- a/layout/base/FramePropertyTable.cpp +++ b/layout/base/FramePropertyTable.cpp @@ -123,7 +123,10 @@ FramePropertyTable::RemoveInternal( if (entry->mProp.mProperty == aProperty) { // There's only one entry and it's the one we want void* value = entry->mProp.mValue; - mEntries.RawRemoveEntry(entry); + + // Here it's ok to use RemoveEntry() -- which may resize mEntries -- + // because we null mLastEntry at the same time. + mEntries.RemoveEntry(entry); mLastEntry = nullptr; if (aFoundResult) { *aFoundResult = true; @@ -209,6 +212,9 @@ FramePropertyTable::DeleteAllFor(const nsIFrame* aFrame) } DeleteAllForEntry(entry); + + // mLastEntry points into mEntries, so we use RawRemoveEntry() which will not + // resize mEntries. mEntries.RawRemoveEntry(entry); } diff --git a/layout/base/FramePropertyTable.h b/layout/base/FramePropertyTable.h index 9688a8f159..8c0a06c1f6 100644 --- a/layout/base/FramePropertyTable.h +++ b/layout/base/FramePropertyTable.h @@ -347,6 +347,9 @@ protected: static void DeleteAllForEntry(Entry* aEntry); + // Note that mLastEntry points into mEntries, so we need to be careful about + // not triggering a resize of mEntries, e.g. use RawRemoveEntry() instead of + // RemoveEntry() in some places. nsTHashtable mEntries; const nsIFrame* mLastFrame; Entry* mLastEntry; diff --git a/layout/inspector/inDOMUtils.cpp b/layout/inspector/inDOMUtils.cpp index 6dc81e5825..bb0c1d6538 100644 --- a/layout/inspector/inDOMUtils.cpp +++ b/layout/inspector/inDOMUtils.cpp @@ -1194,8 +1194,9 @@ inDOMUtils::GetCSSPseudoElementNames(uint32_t* aLength, char16_t*** aNames) { nsTArray array; - const uint8_t pseudoCount = static_cast(CSSPseudoElementType::Count); - for (uint8_t i = 0; i < pseudoCount; ++i) { + const CSSPseudoElementTypeBase pseudoCount = + static_cast(CSSPseudoElementType::Count); + for (CSSPseudoElementTypeBase i = 0; i < pseudoCount; ++i) { CSSPseudoElementType type = static_cast(i); if (!nsCSSPseudoElements::PseudoElementIsUASheetOnly(type)) { nsIAtom* atom = nsCSSPseudoElements::GetPseudoAtom(type); diff --git a/layout/media/symbols.def.in b/layout/media/symbols.def.in index b50fda2135..a828b4048e 100644 --- a/layout/media/symbols.def.in +++ b/layout/media/symbols.def.in @@ -664,6 +664,7 @@ BrotliDecoderDecompress BrotliDecoderStateInit BrotliDecoderStateCleanup BrotliDecoderDecompressStream +BrotliDecoderIsFinished #ifdef MOZ_WEBRTC downmix_int #ifdef MOZ_SAMPLE_TYPE_FLOAT32 diff --git a/layout/reftests/bidi/613157-2-ref.html b/layout/reftests/bidi/613157-2-ref.html index e5c64211ef..a41becbea8 100644 --- a/layout/reftests/bidi/613157-2-ref.html +++ b/layout/reftests/bidi/613157-2-ref.html @@ -5,6 +5,6 @@ Inline blocks shouldn't end the paragraph -

‮אני אוהב--> 4 xoferiF-->8 ימים בשבוע

+

‮ימים בשבוע 8<-- 4 xoferiF<--אני אוהב

diff --git a/layout/reftests/bidi/698706-1-ref.html b/layout/reftests/bidi/698706-1-ref.html index cef33b4c03..a5fb48020e 100644 --- a/layout/reftests/bidi/698706-1-ref.html +++ b/layout/reftests/bidi/698706-1-ref.html @@ -5,6 +5,6 @@ Facebook -
Winnie the Pooh commented on his own קישור on Christopher Robin's wall: "Lorem ipsum dolor sit amet"
+
Winnie the Pooh commented on his own קישור on Christopher Robin's wall: "Lorem ipsum dolor sit amet"
diff --git a/layout/reftests/bidi/83958-2c.html b/layout/reftests/bidi/83958-2c.html deleted file mode 100644 index 91675b7fba..0000000000 --- a/layout/reftests/bidi/83958-2c.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - - Bidirectional Text Test 2 - HTML - - - - -

אבג

-

אבג

-

אבג

-

אבג ABC דהו

-

אבג ABC דהו

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF זחט

-

אבג ABC דהו DEF GHI זחט

-

אבג ABC דהו DEF GHI זחט

-

אבג ABC דהו DEF GHI זחט

- - diff --git a/layout/reftests/bidi/reftest.list b/layout/reftests/bidi/reftest.list index ed3053da35..f467c61829 100644 --- a/layout/reftests/bidi/reftest.list +++ b/layout/reftests/bidi/reftest.list @@ -52,7 +52,6 @@ random-if(cocoaWidget) == with-first-letter-2b.html with-first-letter-2-ref.html == 83958-1c.html 83958-1-ref.html fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2a.html 83958-2-ref.html fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2b.html 83958-2-ref.html -fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azureSkia,111,7) == 83958-2c.html 83958-2-ref.html == 115921-1.html 115921-1-ref.html == 115921-2.html 115921-2-ref.html == 151407-1.html 151407-1-ref.html @@ -74,7 +73,7 @@ fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)&&!layersGPUAccelerated&&!azur == 263359-1a.html 263359-1-ref.html != 263359-1b.html 263359-1-ref.html == 263359-2.html 263359-2-ref.html -== 263359-3.html 263359-3-ref.html +fails == 263359-3.html 263359-3-ref.html # bug 1230455 == 263359-4.html 263359-4-ref.html random-if(winWidget) == 267459-1.html 267459-1-ref.html # depends on windows version, see bug 590101 == 267459-2.html 267459-2-ref.html diff --git a/layout/style/StyleRule.cpp b/layout/style/StyleRule.cpp index d3f2c12ee4..a04a7bdaac 100644 --- a/layout/style/StyleRule.cpp +++ b/layout/style/StyleRule.cpp @@ -314,11 +314,9 @@ nsCSSSelector::nsCSSSelector(void) mNext(nullptr), mNameSpace(kNameSpaceID_Unknown), mOperator(0), - mPseudoType(static_cast(CSSPseudoElementType::NotPseudo)) + mPseudoType(CSSPseudoElementType::NotPseudo) { MOZ_COUNT_CTOR(nsCSSSelector); - static_assert(static_cast(CSSPseudoElementType::MAX) < INT16_MAX, - "CSSPseudoElementType values overflow mPseudoType"); } nsCSSSelector* @@ -333,7 +331,7 @@ nsCSSSelector::Clone(bool aDeepNext, bool aDeepNegations) const result->mCasedTag = mCasedTag; result->mOperator = mOperator; result->mPseudoType = mPseudoType; - + NS_IF_CLONE(mIDList); NS_IF_CLONE(mClassList); NS_IF_CLONE(mPseudoClassList); diff --git a/layout/style/StyleRule.h b/layout/style/StyleRule.h index 4fb1bf8e3d..e0144f4f0a 100644 --- a/layout/style/StyleRule.h +++ b/layout/style/StyleRule.h @@ -25,6 +25,7 @@ class nsIAtom; struct nsCSSSelectorList; namespace mozilla { +enum class CSSPseudoElementType : uint8_t; class CSSStyleSheet; } // namespace mozilla @@ -208,11 +209,9 @@ private: public: // Get and set the selector's pseudo type - mozilla::CSSPseudoElementType PseudoType() const { - return static_cast(mPseudoType); - } + mozilla::CSSPseudoElementType PseudoType() const { return mPseudoType; } void SetPseudoType(mozilla::CSSPseudoElementType aType) { - mPseudoType = static_cast(aType); + mPseudoType = aType; } size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; @@ -233,8 +232,9 @@ public: int32_t mNameSpace; char16_t mOperator; private: - // int16_t to make sure it packs well with mOperator - int16_t mPseudoType; + // The underlying type of CSSPseudoElementType is uint8_t and + // it packs well with mOperator. (char16_t + uint8_t is less than 32bits.) + mozilla::CSSPseudoElementType mPseudoType; nsCSSSelector(const nsCSSSelector& aCopy) = delete; nsCSSSelector& operator=(const nsCSSSelector& aCopy) = delete; diff --git a/layout/style/html.css b/layout/style/html.css index 16f39c847b..0f3bdc6986 100644 --- a/layout/style/html.css +++ b/layout/style/html.css @@ -7,13 +7,14 @@ /* bidi */ +[dir] { + unicode-bidi: -moz-isolate; +} [dir="rtl"] { direction: rtl; - unicode-bidi: embed; } [dir="ltr"] { direction: ltr; - unicode-bidi: embed; } bdi:-moz-dir(ltr), [dir="auto"]:-moz-dir(ltr) { direction: ltr; } @@ -30,62 +31,62 @@ bdi:-moz-dir(rtl), [dir="auto"]:-moz-dir(rtl) { direction: rtl; } * and the rules in http://dev.w3.org/html5/spec/rendering.html#rendering */ -address, address[dir], -article, article[dir], -aside, aside[dir], -blockquote, blockquote[dir], -body, body[dir], -caption, caption[dir], -center, center[dir], -col, col[dir], -colgroup, colgroup[dir], -dd, dd[dir], -dir, dir[dir], -div, div[dir], -dl, dl[dir], -dt, dt[dir], -fieldset, fieldset[dir], -figcaption, figcaption[dir], -figure, figure[dir], -footer, footer[dir], -form, form[dir], -h1, h1[dir], -h2, h2[dir], -h3, h3[dir], -h4, h4[dir], -h5, h5[dir], -h6, h6[dir], -header, header[dir], -hgroup, hgroup[dir], -hr, hr[dir], -html, html[dir], -legend, legend[dir], -li, li[dir], -listing, listing[dir], -main, main[dir], -marquee, marquee[dir], -menu, menu[dir], -nav, nav[dir], -noframes, noframes[dir], -ol, ol[dir], -p, p[dir], -plaintext, plaintext[dir], -pre, pre[dir], -section, section[dir], -summary, summary[dir], -table, table[dir], -tbody, tbody[dir], -td, td[dir], -tfoot, tfoot[dir], -th, th[dir], -thead, thead[dir], -tr, tr[dir], -ul, ul[dir], -xmp, xmp[dir] { +address, +article, +aside, +blockquote, +body, +caption, +center, +col, +colgroup, +dd, +dir, +div, +dl, +dt, +fieldset, +figcaption, +figure, +footer, +form, +h1, +h2, +h3, +h4, +h5, +h6, +header, +hgroup, +hr, +html, +legend, +li, +listing, +main, +marquee, +menu, +nav, +noframes, +ol, +p, +plaintext, +pre, +section, +summary, +table, +tbody, +td, +tfoot, +th, +thead, +tr, +ul, +xmp { unicode-bidi: -moz-isolate; } -bdi, bdi[dir], output, output[dir], [dir="auto"] { +bdi, output { unicode-bidi: -moz-isolate; } bdo, bdo[dir] { @@ -724,7 +725,7 @@ area { display: none ! important; } -iframe:-moz-full-screen { +iframe:fullscreen { /* iframes in full-screen mode don't show a border. */ border: none !important; padding: 0 !important; diff --git a/layout/style/nsCSSPseudoClassList.h b/layout/style/nsCSSPseudoClassList.h index ed8650b4f1..5a73f71b82 100644 --- a/layout/style/nsCSSPseudoClassList.h +++ b/layout/style/nsCSSPseudoClassList.h @@ -163,6 +163,7 @@ CSS_STATE_PSEUDO_CLASS(mozDevtoolsHighlighted, ":-moz-devtools-highlighted", 0, // Matches the element which is being displayed full-screen, and // any containing frames. +CSS_STATE_PSEUDO_CLASS(fullscreen, ":fullscreen", 0, "", NS_EVENT_STATE_FULL_SCREEN) CSS_STATE_PSEUDO_CLASS(mozFullScreen, ":-moz-full-screen", 0, "", NS_EVENT_STATE_FULL_SCREEN) // Matches any element which is an ancestor of the DOM full-screen element, diff --git a/layout/style/nsCSSPseudoElements.cpp b/layout/style/nsCSSPseudoElements.cpp index 7f5d85e39b..3323c5412c 100644 --- a/layout/style/nsCSSPseudoElements.cpp +++ b/layout/style/nsCSSPseudoElements.cpp @@ -76,7 +76,9 @@ nsCSSPseudoElements::IsCSS2PseudoElement(nsIAtom *aAtom) /* static */ CSSPseudoElementType nsCSSPseudoElements::GetPseudoType(nsIAtom *aAtom) { - for (uint8_t i = 0; i < ArrayLength(CSSPseudoElements_info); ++i) { + for (CSSPseudoElementTypeBase i = 0; + i < ArrayLength(CSSPseudoElements_info); + ++i) { if (*CSSPseudoElements_info[i].mAtom == aAtom) { return static_cast(i); } @@ -99,13 +101,14 @@ nsCSSPseudoElements::GetPseudoType(nsIAtom *aAtom) nsCSSPseudoElements::GetPseudoAtom(Type aType) { NS_ASSERTION(aType < Type::Count, "Unexpected type"); - return *CSSPseudoElements_info[static_cast(aType)].mAtom; + return *CSSPseudoElements_info[ + static_cast(aType)].mAtom; } /* static */ uint32_t nsCSSPseudoElements::FlagsForPseudoElement(const Type aType) { - uint8_t index = static_cast(aType); + CSSPseudoElementTypeBase index = static_cast(aType); NS_ASSERTION(index < ArrayLength(CSSPseudoElements_flags), "argument must be a pseudo-element"); return CSSPseudoElements_flags[index]; diff --git a/layout/style/nsCSSPseudoElements.h b/layout/style/nsCSSPseudoElements.h index 8d7ec5c6cf..a3f7c16aa7 100644 --- a/layout/style/nsCSSPseudoElements.h +++ b/layout/style/nsCSSPseudoElements.h @@ -39,7 +39,8 @@ namespace mozilla { // The total count of CSSPseudoElement is less than 256, // so use uint8_t as its underlying type. -enum class CSSPseudoElementType : uint8_t { +typedef uint8_t CSSPseudoElementTypeBase; +enum class CSSPseudoElementType : CSSPseudoElementTypeBase { // If the actual pseudo-elements stop being first here, change // GetPseudoType. #define CSS_PSEUDO_ELEMENT(_name, _value_, _flags) \ diff --git a/layout/style/nsCSSRuleProcessor.cpp b/layout/style/nsCSSRuleProcessor.cpp index c167e22b05..1401299ab7 100644 --- a/layout/style/nsCSSRuleProcessor.cpp +++ b/layout/style/nsCSSRuleProcessor.cpp @@ -910,8 +910,8 @@ struct RuleCascadeData { size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const; RuleHash mRuleHash; - RuleHash* - mPseudoElementRuleHashes[static_cast(CSSPseudoElementType::Count)]; + RuleHash* mPseudoElementRuleHashes[ + static_cast(CSSPseudoElementType::Count)]; nsTArray mStateSelectors; EventStates mSelectorDocumentStates; PLDHashTable mClassSelectors; @@ -2645,9 +2645,8 @@ nsCSSRuleProcessor::RulesMatching(PseudoElementRuleProcessorData* aData) RuleCascadeData* cascade = GetRuleCascade(aData->mPresContext); if (cascade) { - RuleHash* ruleHash = - cascade->mPseudoElementRuleHashes[static_cast( - aData->mPseudoType)]; + RuleHash* ruleHash = cascade->mPseudoElementRuleHashes[ + static_cast(aData->mPseudoType)]; if (ruleHash) { NodeMatchContext nodeContext(EventStates(), nsCSSRuleProcessor::IsLink(aData->mElement)); @@ -3416,8 +3415,8 @@ AddRule(RuleSelectorPair* aRuleInfo, RuleCascadeData* aCascade) if (MOZ_LIKELY(pseudoType == CSSPseudoElementType::NotPseudo)) { cascade->mRuleHash.AppendRule(*aRuleInfo); } else if (pseudoType < CSSPseudoElementType::Count) { - RuleHash*& ruleHash = - cascade->mPseudoElementRuleHashes[static_cast(pseudoType)]; + RuleHash*& ruleHash = cascade->mPseudoElementRuleHashes[ + static_cast(pseudoType)]; if (!ruleHash) { ruleHash = new RuleHash(cascade->mQuirksMode); if (!ruleHash) { diff --git a/layout/style/nsStyleContext.cpp b/layout/style/nsStyleContext.cpp index 32d842cba0..23327d8d2f 100644 --- a/layout/style/nsStyleContext.cpp +++ b/layout/style/nsStyleContext.cpp @@ -90,7 +90,8 @@ nsStyleContext::nsStyleContext(nsStyleContext* aParent, // This check has to be done "backward", because if it were written the // more natural way it wouldn't fail even when it needed to. static_assert((UINT64_MAX >> NS_STYLE_CONTEXT_TYPE_SHIFT) >= - static_cast(CSSPseudoElementType::MAX), + static_cast( + CSSPseudoElementType::MAX), "pseudo element bits no longer fit in a uint64_t"); MOZ_ASSERT(aRuleNode); diff --git a/layout/style/test/test_default_bidi_css.html b/layout/style/test/test_default_bidi_css.html index 3386a075ea..3da5047500 100644 --- a/layout/style/test/test_default_bidi_css.html +++ b/layout/style/test/test_default_bidi_css.html @@ -32,10 +32,10 @@ var tests = [ ['div', {'dir': ''}, 'ltr', '-moz-isolate'], ['span', {}, 'ltr', 'normal'], - ['span', {'dir': 'ltr'}, 'ltr', 'embed'], - ['span', {'dir': 'rtl'}, 'rtl', 'embed'], + ['span', {'dir': 'ltr'}, 'ltr', '-moz-isolate'], + ['span', {'dir': 'rtl'}, 'rtl', '-moz-isolate'], ['span', {'dir': 'auto'}, 'ltr', '-moz-isolate'], - ['span', {'dir': ''}, 'ltr', 'normal'], + ['span', {'dir': ''}, 'ltr', '-moz-isolate'], ['bdi', {}, 'ltr', '-moz-isolate'], ['bdi', {'dir': 'ltr'}, 'ltr', '-moz-isolate'], @@ -56,10 +56,10 @@ var tests = [ ['bdo', {'dir': ''}, 'ltr', 'bidi-override'], ['textarea', {}, 'ltr', 'normal'], - ['textarea', {'dir': 'ltr'}, 'ltr', 'embed'], - ['textarea', {'dir': 'rtl'}, 'rtl', 'embed'], + ['textarea', {'dir': 'ltr'}, 'ltr', '-moz-isolate'], + ['textarea', {'dir': 'rtl'}, 'rtl', '-moz-isolate'], ['textarea', {'dir': 'auto'}, 'ltr', '-moz-plaintext'], - ['textarea', {'dir': ''}, 'ltr', 'normal'], + ['textarea', {'dir': ''}, 'ltr', '-moz-isolate'], ['pre', {}, 'ltr', '-moz-isolate'], ['pre', {'dir': 'ltr'}, 'ltr', '-moz-isolate'], diff --git a/layout/style/ua.css b/layout/style/ua.css index ba1b4d5363..fe88b6d7e0 100644 --- a/layout/style/ua.css +++ b/layout/style/ua.css @@ -276,7 +276,7 @@ } -*|*:-moz-full-screen:not(:root) { +*|*:fullscreen:not(:root) { position: fixed !important; top: 0 !important; left: 0 !important; @@ -296,7 +296,7 @@ /* Selectors here should match the check in * nsViewportFrame.cpp:ShouldInTopLayerForFullscreen() */ -*|*:-moz-full-screen:not(:root):not(:-moz-browser-frame) { +*|*:fullscreen:not(:root):not(:-moz-browser-frame) { -moz-top-layer: top !important; } diff --git a/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp b/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp index e6d5bd07eb..e760151eca 100644 --- a/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp +++ b/media/gmp-clearkey/0.1/ClearKeySessionManager.cpp @@ -50,16 +50,6 @@ ClearKeySessionManager::~ClearKeySessionManager() CK_LOGD("ClearKeySessionManager dtor %p", this); } -static bool -ShouldBeAbleToDecode() -{ -#if !defined(ENABLE_WMF) - return false; -#else - return IsWindowsVistaOrGreater(); -#endif -} - static bool CanDecode() { @@ -75,17 +65,12 @@ ClearKeySessionManager::Init(GMPDecryptorCallback* aCallback) { CK_LOGD("ClearKeySessionManager::Init"); mCallback = aCallback; - if (ShouldBeAbleToDecode()) { - if (!CanDecode()) { - const char* err = "EME plugin can't load system decoder!"; - mCallback->SessionError(nullptr, 0, kGMPAbortError, 0, err, strlen(err)); - } else { - mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AND_DECODE_AUDIO | - GMP_EME_CAP_DECRYPT_AND_DECODE_VIDEO); - } - } else { + if (!CanDecode()) { mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AUDIO | GMP_EME_CAP_DECRYPT_VIDEO); + } else { + mCallback->SetCapabilities(GMP_EME_CAP_DECRYPT_AND_DECODE_AUDIO | + GMP_EME_CAP_DECRYPT_AND_DECODE_VIDEO); } ClearKeyPersistence::EnsureInitialized(); } diff --git a/media/libogg/moz.build b/media/libogg/moz.build index cecda3d824..964e3a0ea5 100644 --- a/media/libogg/moz.build +++ b/media/libogg/moz.build @@ -4,6 +4,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +with Files('*'): + BUG_COMPONENT = ('Core', 'Video/Audio') + EXPORTS.ogg += [ 'include/ogg/config_types.h', 'include/ogg/ogg.h', diff --git a/media/libopus/README_MOZILLA b/media/libopus/README_MOZILLA index 2d85c9f99e..641da75ee2 100644 --- a/media/libopus/README_MOZILLA +++ b/media/libopus/README_MOZILLA @@ -8,4 +8,4 @@ files after the copy step. The upstream repository is https://git.xiph.org/opus.git -The git tag/revision used was v1.1. +The git tag/revision used was v1.1.2. diff --git a/media/libopus/celt/_kiss_fft_guts.h b/media/libopus/celt/_kiss_fft_guts.h index aefe490e11..5e3d58fd66 100644 --- a/media/libopus/celt/_kiss_fft_guts.h +++ b/media/libopus/celt/_kiss_fft_guts.h @@ -65,10 +65,6 @@ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) -# define C_MUL4(m,a,b) \ - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) - # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ (c).i = S_MUL( (c).i , s ) ; }while(0) @@ -101,6 +97,9 @@ #if defined(OPUS_ARM_INLINE_EDSP) #include "arm/kiss_fft_armv5e.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/kiss_fft_mipsr1.h" +#endif #else /* not FIXED_POINT*/ diff --git a/media/libopus/celt/arch.h b/media/libopus/celt/arch.h index 3bbcd3663a..9f74ddd267 100644 --- a/media/libopus/celt/arch.h +++ b/media/libopus/celt/arch.h @@ -69,11 +69,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define IMUL32(a,b) ((a)*(b)) -#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ @@ -108,6 +105,13 @@ typedef opus_val32 celt_ener; #define SCALEIN(a) (a) #define SCALEOUT(a) (a) +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) + +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "fixed_debug.h" #else @@ -137,6 +141,22 @@ typedef float celt_sig; typedef float celt_norm; typedef float celt_ener; +#ifdef FLOAT_APPROX +/* This code should reliably detect NaN/inf even when -ffast-math is used. + Assumes IEEE 754 format. */ +static OPUS_INLINE int celt_isnan(float x) +{ + union {float f; opus_uint32 i;} in; + in.f = x; + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; +} +#else +#ifdef __FAST_MATH__ +#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input +#endif +#define celt_isnan(x) ((x)!=(x)) +#endif + #define Q15ONE 1.0f #define NORM_SCALING 1.f @@ -146,6 +166,10 @@ typedef float celt_ener; #define VERY_LARGE16 1e15f #define Q15_ONE ((opus_val16)1.f) +/* This appears to be the same speed as C99's fabsf() but it's more portable. */ +#define ABS16(x) ((float)fabs(x)) +#define ABS32(x) ((float)fabs(x)) + #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -184,6 +208,7 @@ typedef float celt_ener; #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) +#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b)) @@ -201,6 +226,8 @@ typedef float celt_ener; #define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) +#define SIG2WORD16(x) (x) + #endif /* !FIXED_POINT */ #ifndef GLOBAL_STACK_SIZE diff --git a/media/libopus/celt/arm/arm2gnu.pl b/media/libopus/celt/arm/arm2gnu.pl index eab42efa2b..6c922ac819 100755 --- a/media/libopus/celt/arm/arm2gnu.pl +++ b/media/libopus/celt/arm/arm2gnu.pl @@ -1,7 +1,33 @@ #!/usr/bin/perl +# Copyright (C) 2002-2013 Xiph.org Foundation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. my $bigend; # little/big endian my $nxstack; +my $apple = 0; +my $symprefix = ""; $nxstack = 0; @@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' while ($ARGV[0] =~ /^-/) { $_ = shift; - last if /^--/; - if (/^-n/) { + last if /^--$/; + if (/^-n$/) { $nflag++; next; } + if (/^--apple$/) { + $apple = 1; + $symprefix = "_"; + next; + } die "I don't recognize this switch: $_\\n"; } $printit++ unless $nflag; @@ -25,6 +56,8 @@ $n=0; $thumb = 0; # ARM mode by default, not Thumb. @proc_stack = (); +printf (" .syntax unified\n"); + LINE: while (<>) { @@ -53,7 +86,7 @@ while (<>) { s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/; s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/; s/\bIMPORT\b/.extern/; - s/\bEXPORT\b/.global/; + s/\bEXPORT\b\s*/.global $symprefix/; s/^(\s+)\[/$1IF/; s/^(\s+)\|/$1ELSE/; s/^(\s+)\]/$1ENDIF/; @@ -109,7 +142,7 @@ while (<>) { # won't match the original source file (we could use the .line # directive, which is documented to be obsolete, but then gdb will # show the wrong line in the translated source file). - s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/; + s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple); } } @@ -131,9 +164,13 @@ while (<>) { $prefix = ""; if ($proc) { - $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc); + $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple); + # Make sure we $prefix isn't empty here (for the $apple case). + # We handle mangling the label here, make sure it doesn't match + # the label handling below (if $prefix would be empty). + $prefix = "; "; push(@proc_stack, $proc); - s/^[A-Za-z_\.]\w+/$&:/; + s/^[A-Za-z_\.]\w+/$symprefix$&:/; } $prefix = $prefix."\t.thumb_func; " if ($thumb); s/\bPROC\b/@ $&/; @@ -146,7 +183,7 @@ while (<>) { my $proc; s/\bENDP\b/@ $&/; $proc = pop(@proc_stack); - $_ = "\t.size $proc, .-$proc".$_ if ($proc); + $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple); } s/\bSUBT\b/@ $&/; s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25 @@ -311,6 +348,6 @@ while (<>) { } #If we had a code section, mark that this object doesn't need an executable # stack. -if ($nxstack) { +if ($nxstack && !$apple) { printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n"); } diff --git a/media/libopus/celt/arm/arm_celt_map.c b/media/libopus/celt/arm/arm_celt_map.c index 547a84d149..ee6c244786 100644 --- a/media/libopus/celt/arm/arm_celt_map.c +++ b/media/libopus/celt/arm/arm_celt_map.c @@ -30,6 +30,8 @@ #endif #include "pitch.h" +#include "kiss_fft.h" +#include "mdct.h" #if defined(OPUS_HAVE_RTCD) @@ -41,9 +43,79 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; -# else -# error "Floating-point implementation is not supported by ARM asm yet." \ - "Reconfigure with --disable-rtcd or send patches." -# endif +# else /* !FIXED_POINT */ +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int) = { + celt_pitch_xcorr_c, /* ARMv4 */ + celt_pitch_xcorr_c, /* EDSP */ + celt_pitch_xcorr_c, /* Media */ + celt_pitch_xcorr_float_neon /* Neon */ +}; +# endif +# endif /* FIXED_POINT */ -#endif +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(HAVE_ARM_NE10) +# if defined(CUSTOM_MODES) +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_alloc_arch_c, /* ARMv4 */ + opus_fft_alloc_arch_c, /* EDSP */ + opus_fft_alloc_arch_c, /* Media */ + opus_fft_alloc_arm_neon /* Neon with NE10 library support */ +}; + +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_free_arch_c, /* ARMv4 */ + opus_fft_free_arch_c, /* EDSP */ + opus_fft_free_arch_c, /* Media */ + opus_fft_free_arm_neon /* Neon with NE10 */ +}; +# endif /* CUSTOM_MODES */ + +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) = { + opus_fft_c, /* ARMv4 */ + opus_fft_c, /* EDSP */ + opus_fft_c, /* Media */ + opus_fft_neon /* Neon with NE10 */ +}; + +void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) = { + opus_ifft_c, /* ARMv4 */ + opus_ifft_c, /* EDSP */ + opus_ifft_c, /* Media */ + opus_ifft_neon /* Neon with NE10 */ +}; + +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, + int stride, int arch) = { + clt_mdct_forward_c, /* ARMv4 */ + clt_mdct_forward_c, /* EDSP */ + clt_mdct_forward_c, /* Media */ + clt_mdct_forward_neon /* Neon with NE10 */ +}; + +void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, + int stride, int arch) = { + clt_mdct_backward_c, /* ARMv4 */ + clt_mdct_backward_c, /* EDSP */ + clt_mdct_backward_c, /* Media */ + clt_mdct_backward_neon /* Neon with NE10 */ +}; + +# endif /* HAVE_ARM_NE10 */ +# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ + +#endif /* OPUS_HAVE_RTCD */ diff --git a/media/libopus/celt/arm/armcpu.c b/media/libopus/celt/arm/armcpu.c index 17685258b1..5e5d10c344 100644 --- a/media/libopus/celt/arm/armcpu.c +++ b/media/libopus/celt/arm/armcpu.c @@ -73,7 +73,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*VORR q0,q0,q0*/ __emit(0xF2200150); @@ -107,7 +107,7 @@ opus_uint32 opus_cpu_capabilities(void) while(fgets(buf, 512, cpuinfo) != NULL) { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for edsp and neon flag */ if(memcmp(buf, "Features", 8) == 0) { @@ -118,7 +118,7 @@ opus_uint32 opus_cpu_capabilities(void) flags |= OPUS_CPU_ARM_EDSP; # endif -# if defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) p = strstr(buf, " neon"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) flags |= OPUS_CPU_ARM_NEON; diff --git a/media/libopus/celt/arm/celt_ne10_fft.c b/media/libopus/celt/arm/celt_ne10_fft.c new file mode 100644 index 0000000000..42d96a7117 --- /dev/null +++ b/media/libopus/celt/arm/celt_ne10_fft.c @@ -0,0 +1,174 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_fft.c + @brief ARM Neon optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#endif + +#include +#include +#include "os_support.h" +#include "kiss_fft.h" +#include "stack_alloc.h" + +#if !defined(FIXED_POINT) +# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon +#else +# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft) +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon +#endif + +#if defined(CUSTOM_MODES) + +/* nfft lengths in NE10 that support scaled fft */ +# define NE10_FFTSCALED_SUPPORT_MAX 4 +static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { + 480, 240, 120, 60 +}; + +int opus_fft_alloc_arm_neon(kiss_fft_state *st) +{ + int i; + size_t memneeded = sizeof(struct arch_fft_state); + + st->arch_fft = (arch_fft_state *)opus_alloc(memneeded); + if (!st->arch_fft) + return -1; + + for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) { + if(st->nfft == ne10_fft_scaled_support[i]) + break; + } + if (i == NE10_FFTSCALED_SUPPORT_MAX) { + /* This nfft length (scaled fft) is not supported in NE10 */ + st->arch_fft->is_supported = 0; + st->arch_fft->priv = NULL; + } + else { + st->arch_fft->is_supported = 1; + st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft); + if (st->arch_fft->priv == NULL) { + return -1; + } + } + return 0; +} + +void opus_fft_free_arm_neon(kiss_fft_state *st) +{ + NE10_FFT_CFG_TYPE_T cfg; + + if (!st->arch_fft) + return; + + cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv; + if (cfg) + NE10_FFT_DESTROY_C2C_TYPE(cfg); + opus_free(st->arch_fft); +} +#endif + +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) +{ + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); + SAVE_STACK; + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); + + if (!st->arch_fft->is_supported) { + /* This nfft length (scaled fft) not supported in NE10 */ + opus_fft_c(st, fin, fout); + } + else { + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) + state.is_forward_scaled = 1; + + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0, 1); +#endif + } + RESTORE_STACK; +} + +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) +{ + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); + SAVE_STACK; + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); + + if (!st->arch_fft->is_supported) { + /* This nfft length (scaled fft) not supported in NE10 */ + opus_ifft_c(st, fin, fout); + } + else { + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) + state.is_backward_scaled = 0; + + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1, 0); +#endif + } + RESTORE_STACK; +} diff --git a/media/libopus/celt/arm/celt_ne10_mdct.c b/media/libopus/celt/arm/celt_ne10_mdct.c new file mode 100644 index 0000000000..293c3efd7a --- /dev/null +++ b/media/libopus/celt/arm/celt_ne10_mdct.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_mdct.c + @brief ARM Neon optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#endif + +#include "kiss_fft.h" +#include "_kiss_fft_guts.h" +#include "mdct.h" +#include "stack_alloc.h" + +void clt_mdct_forward_neon(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + + SAVE_STACK; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +void clt_mdct_backward_neon(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, + int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + const kiss_twiddle_scalar *trig; + const kiss_fft_state *st = l->kfft[shift]; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + for(i=0;i>1)), arch); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + re = yp0[0]; + im = yp0[1]; + t0 = t[i]; + t1 = t[N4+i]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + re = yp1[0]; + im = yp1[1]; + yp0[0] = yr; + yp1[1] = yi; + + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + yp1[0] = yr; + yp0[1] = yi; + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } + RESTORE_STACK; +} diff --git a/media/libopus/celt/arm/celt_neon_intr.c b/media/libopus/celt/arm/celt_neon_intr.c new file mode 100644 index 0000000000..47dce15ba5 --- /dev/null +++ b/media/libopus/celt/arm/celt_neon_intr.c @@ -0,0 +1,252 @@ +/* Copyright (c) 2014-2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_neon_intr.c + @brief ARM Neon Intrinsic optimizations for celt + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include "../pitch.h" + +#if !defined(FIXED_POINT) +/* + * Function: xcorr_kernel_neon_float + * --------------------------------- + * Computes 4 correlation values and stores them in sum[4] + */ +static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y, + float32_t sum[4], int len) { + float32x4_t YY[3]; + float32x4_t YEXT[3]; + float32x4_t XX[2]; + float32x2_t XX_2; + float32x4_t SUMM; + const float32_t *xi = x; + const float32_t *yi = y; + + celt_assert(len>0); + + YY[0] = vld1q_f32(yi); + SUMM = vdupq_n_f32(0); + + /* Consume 8 elements in x vector and 12 elements in y + * vector. However, the 12'th element never really gets + * touched in this loop. So, if len == 8, then we only + * must access y[0] to y[10]. y[11] must not be accessed + * hence make sure len > 8 and not len >= 8 + */ + while (len > 8) { + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + YY[2] = vld1q_f32(yi); + + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + + SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); + YEXT[0] = vextq_f32(YY[0], YY[1], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); + YEXT[1] = vextq_f32(YY[0], YY[1], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); + YEXT[2] = vextq_f32(YY[0], YY[1], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); + + SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0); + YEXT[0] = vextq_f32(YY[1], YY[2], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1); + YEXT[1] = vextq_f32(YY[1], YY[2], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0); + YEXT[2] = vextq_f32(YY[1], YY[2], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1); + + YY[0] = YY[2]; + len -= 8; + } + + /* Consume 4 elements in x vector and 8 elements in y + * vector. However, the 8'th element in y never really gets + * touched in this loop. So, if len == 4, then we only + * must access y[0] to y[6]. y[7] must not be accessed + * hence make sure len>4 and not len>=4 + */ + if (len > 4) { + yi += 4; + YY[1] = vld1q_f32(yi); + + XX[0] = vld1q_f32(xi); + xi += 4; + + SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); + YEXT[0] = vextq_f32(YY[0], YY[1], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); + YEXT[1] = vextq_f32(YY[0], YY[1], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); + YEXT[2] = vextq_f32(YY[0], YY[1], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); + + YY[0] = YY[1]; + len -= 4; + } + + while (--len > 0) { + XX_2 = vld1_dup_f32(xi++); + SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); + YY[0]= vld1q_f32(++yi); + } + + XX_2 = vld1_dup_f32(xi); + SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); + + vst1q_f32(sum, SUMM); +} + +/* + * Function: xcorr_kernel_neon_float_process1 + * --------------------------------- + * Computes single correlation values and stores in *sum + */ +static void xcorr_kernel_neon_float_process1(const float32_t *x, + const float32_t *y, float32_t *sum, int len) { + float32x4_t XX[4]; + float32x4_t YY[4]; + float32x2_t XX_2; + float32x2_t YY_2; + float32x4_t SUMM; + float32x2_t SUMM_2[2]; + const float32_t *xi = x; + const float32_t *yi = y; + + SUMM = vdupq_n_f32(0); + + /* Work on 16 values per iteration */ + while (len >= 16) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + XX[2] = vld1q_f32(xi); + xi += 4; + XX[3] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + YY[2] = vld1q_f32(yi); + yi += 4; + YY[3] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + SUMM = vmlaq_f32(SUMM, YY[2], XX[2]); + SUMM = vmlaq_f32(SUMM, YY[3], XX[3]); + len -= 16; + } + + /* Work on 8 values */ + if (len >= 8) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + len -= 8; + } + + /* Work on 4 values */ + if (len >= 4) { + XX[0] = vld1q_f32(xi); + xi += 4; + YY[0] = vld1q_f32(yi); + yi += 4; + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + len -= 4; + } + + /* Start accumulating results */ + SUMM_2[0] = vget_low_f32(SUMM); + if (len >= 2) { + /* While at it, consume 2 more values if available */ + XX_2 = vld1_f32(xi); + xi += 2; + YY_2 = vld1_f32(yi); + yi += 2; + SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2); + len -= 2; + } + SUMM_2[1] = vget_high_f32(SUMM); + SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]); + SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]); + /* Ok, now we have result accumulated in SUMM_2[0].0 */ + + if (len > 0) { + /* Case when you have one value left */ + XX_2 = vld1_dup_f32(xi); + YY_2 = vld1_dup_f32(yi); + SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2); + } + + vst1_lane_f32(sum, SUMM_2[0], 0); +} + +void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) { + int i; + celt_assert(max_pitch > 0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); + + for (i = 0; i < (max_pitch-3); i += 4) { + xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i, + (float32_t *)xcorr+i, len); + } + + /* In case max_pitch isn't multiple of 4 + * compute single correlation value per iteration + */ + for (; i < max_pitch; i++) { + xcorr_kernel_neon_float_process1((const float32_t *)_x, + (const float32_t *)_y+i, (float32_t *)xcorr+i, len); + } +} +#endif diff --git a/media/libopus/celt/arm/celt_pitch_xcorr_arm.s b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s index 09917b16bf..f96e0a88bb 100644 --- a/media/libopus/celt/arm/celt_pitch_xcorr_arm.s +++ b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s @@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON ; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 xcorr_kernel_neon PROC +xcorr_kernel_neon_start ; input: ; r3 = int len ; r4 = opus_val16 *x @@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4 VEOR q0, q0, q0 ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. ; So we don't save/restore any other registers. - BL xcorr_kernel_neon + BL xcorr_kernel_neon_start SUBS r6, r6, #4 VST1.32 {q0}, [r2]! ; _y += 4 @@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP ; This will get used on ARMv7 devices without NEON, so it has been optimized ; to take advantage of dual-issuing where possible. xcorr_kernel_edsp PROC +xcorr_kernel_edsp_start ; input: ; r3 = int len ; r4 = opus_val16 *_x (must be 32-bit aligned) @@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done SUBS r2, r2, #1 ; j-- ; Stall SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) - LDRGTH r14, [r4], #2 ; r14 = *x++ + LDRHGT r14, [r4], #2 ; r14 = *x++ SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) @@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) LDRH r10, [r5], #2 ; r10 = y_4 = *y++ SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) - LDRGTH r12, [r4], #2 ; r12 = *x++ + LDRHGT r12, [r4], #2 ; r12 = *x++ SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) BLE xcorr_kernel_edsp_done SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) @@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) LDRH r2, [r5], #2 ; r2 = y_5 = *y++ SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRGTH r14, [r4] ; r14 = *x + LDRHGT r14, [r4] ; r14 = *x SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) BLE xcorr_kernel_edsp_done SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) @@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4 celt_pitch_xcorr_edsp_process1u_loop4_done ADDS r12, r12, #4 celt_pitch_xcorr_edsp_process1u_loop1 - LDRGEH r6, [r4], #2 + LDRHGE r6, [r4], #2 ; Stall SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - SUBGES r12, r12, #1 - LDRGTH r8, [r5], #2 + SUBSGE r12, r12, #1 + LDRHGT r8, [r5], #2 BGT celt_pitch_xcorr_edsp_process1u_loop1 ; Restore _x SUB r4, r4, r3, LSL #1 @@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4 MOV r7, #0 MOV r8, #0 MOV r9, #0 - BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) CMP r0, r6 ; _y+=4 @@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1 ADDS r12, r12, #1 ; Stall SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDRGTH r7, [r4], #2 + LDRHGT r7, [r4], #2 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) BLE celt_pitch_xcorr_edsp_process2_done LDRH r9, [r5], #2 @@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done SUBGE r12, r12, #2 SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) ADDS r12, r12, #1 - LDRGEH r6, [r4], #2 - LDRGEH r8, [r5], #2 + LDRHGE r6, [r4], #2 + LDRHGE r8, [r5], #2 ; Stall SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) ; maxcorr = max(maxcorr, sum) diff --git a/media/libopus/celt/arm/fft_arm.h b/media/libopus/celt/arm/fft_arm.h new file mode 100644 index 0000000000..0cb55d8e22 --- /dev/null +++ b/media/libopus/celt/arm/fft_arm.h @@ -0,0 +1,72 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file fft_arm.h + @brief ARM Neon Intrinsic optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if !defined(FFT_ARM_H) +#define FFT_ARM_H + +#include "config.h" +#include "kiss_fft.h" + +#if defined(HAVE_ARM_NE10) + +int opus_fft_alloc_arm_neon(kiss_fft_state *st); +void opus_fft_free_arm_neon(kiss_fft_state *st); + +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); + +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); + +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_FFT (1) + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arm_neon(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arm_neon(_st)) + +#define opus_fft(_st, _fin, _fout, arch) \ + ((void)(arch), opus_fft_neon(_st, _fin, _fout)) + +#define opus_ifft(_st, _fin, _fout, arch) \ + ((void)(arch), opus_ifft_neon(_st, _fin, _fout)) + +#endif /* OPUS_HAVE_RTCD */ + +#endif /* HAVE_ARM_NE10 */ + +#endif diff --git a/media/libopus/celt/arm/fixed_armv4.h b/media/libopus/celt/arm/fixed_armv4.h index b690bc8cea..efb3b1896a 100644 --- a/media/libopus/celt/arm/fixed_armv4.h +++ b/media/libopus/celt/arm/fixed_armv4.h @@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) #undef MAC16_32_Q15 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #undef MULT32_32_Q31 diff --git a/media/libopus/celt/arm/fixed_armv5e.h b/media/libopus/celt/arm/fixed_armv5e.h index 1194a7d3ec..36a6321101 100644 --- a/media/libopus/celt/arm/fixed_armv5e.h +++ b/media/libopus/celt/arm/fixed_armv5e.h @@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q16\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) + /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, @@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) } #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) +#ifdef OPUS_ARM_INLINE_MEDIA + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) +{ + celt_sig res; + __asm__( + "#SIG2WORD16\n\t" + "ssat %0, #16, %1, ASR #12\n\t" + : "=r"(res) + : "r"(x+2048) + ); + return EXTRACT16(res); +} +#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) + +#endif /* OPUS_ARM_INLINE_MEDIA */ + #endif diff --git a/media/libopus/celt/arm/mdct_arm.h b/media/libopus/celt/arm/mdct_arm.h new file mode 100644 index 0000000000..49cbb44576 --- /dev/null +++ b/media/libopus/celt/arm/mdct_arm.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file arm_mdct.h + @brief ARM Neon Intrinsic optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(MDCT_ARM_H) +#define MDCT_ARM_H + +#include "config.h" +#include "mdct.h" + +#if defined(HAVE_ARM_NE10) +/** Compute a forward MDCT and scale by 4/N, trashes the input array */ +void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); + +void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); + +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_MDCT (1) +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ + clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) +#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ + clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) +#endif /* OPUS_HAVE_RTCD */ +#endif /* HAVE_ARM_NE10 */ + +#endif diff --git a/media/libopus/celt/arm/pitch_arm.h b/media/libopus/celt/arm/pitch_arm.h index a07f8ac2fa..8626ed75b9 100644 --- a/media/libopus/celt/arm/pitch_arm.h +++ b/media/libopus/celt/arm/pitch_arm.h @@ -52,6 +52,17 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) # endif -# endif - +#else /* Start !FIXED_POINT */ +/* Float case */ +#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR) +#define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) +#endif +#endif + +#endif /* end !FIXED_POINT */ #endif diff --git a/media/libopus/celt/bands.c b/media/libopus/celt/bands.c index cce56e2f6e..25f229e267 100644 --- a/media/libopus/celt/bands.c +++ b/media/libopus/celt/bands.c @@ -92,11 +92,11 @@ static int bitexact_log2tan(int isin,int icos) #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize< 0) { - int shift = celt_ilog2(maxval)-10; - j=M*eBands[i]; do { - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), - EXTRACT16(VSHR32(X[j+c*N],shift))); - } while (++jlogN[i]>>BITRES)+LM+1)>>1); + j=eBands[i]<0) + { + do { + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), + EXTRACT16(SHR32(X[j+c*N],shift))); + } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { @@ -150,18 +155,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #else /* FIXED_POINT */ /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -190,74 +193,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel /* De-normalise the energy to produce the synthesis from the unit-energy bands */ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, + int end, int M, int downsample, int silence) { - int i, c, N; + int i, N; + int bound; + celt_sig * OPUS_RESTRICT f; + const celt_norm * OPUS_RESTRICT x; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); - c=0; do { - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - f = freq+c*N; - x = X+c*N+M*eBands[start]; - for (i=0;inbEBands], SHL16((opus_val16)eMeans[i],6)); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef FIXED_POINT - g = celt_exp2(lg); + g = celt_exp2(lg); #else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<=0); + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef FIXED_POINT thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); @@ -345,12 +355,12 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } /* We just added some energy, so we need to renormalise */ if (renormalize) - renormalise_vector(X, N0<m->nbEBands-4) - hf_sum += 32*(tcount[1]+tcount[0])/N; + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); sum += tmp*256; nbBands++; @@ -493,7 +502,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, if (update_hf) { if (hf_sum) - hf_sum /= C*(4-m->nbEBands+end); + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); *hf_average = (*hf_average+hf_sum)>>1; hf_sum = *hf_average; if (*tapset_decision==2) @@ -509,7 +518,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ - sum /= nbBands; + celt_assert(sum>=0); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -567,8 +577,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;jarch); } tell = ec_tell_frac(ec); if (qn!=1) @@ -769,7 +779,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, ec_dec_update(ec, fl, fl+fs, ft); } } - itheta = (opus_int32)itheta*16384/qn; + celt_assert(itheta>=0); + itheta = celt_udiv((opus_int32)itheta*16384, qn); if (encode && stereo) { if (itheta==0) @@ -1021,8 +1032,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;jarch); } } } @@ -1084,7 +1094,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, longBlocks = B0==1; - N_B /= B; + N_B = celt_udiv(N_B, B); /* Special case for one sample */ if (N==1) @@ -1098,9 +1108,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) { - int j; - for (j=0;jarch); if (inv) { int j; @@ -1353,9 +1361,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm void quant_all_bands(int encode, const CELTMode *m, int start, int end, - celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, - int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, - opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) + celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, + const celt_ener *bandE, int *pulses, int shortBlocks, int spread, + int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, + opus_int32 balance, ec_ctx *ec, int LM, int codedBands, + opus_uint32 *seed, int arch) { int i; opus_int32 remaining_bits; @@ -1397,6 +1407,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ctx.m = m; ctx.seed = *seed; ctx.spread = spread; + ctx.arch = arch; for (i=start;ioverlap; + nbEBands = mode->nbEBands; + N = mode->shortMdctSize<shortMdctSize; + B = M; + NB = mode->shortMdctSize; shift = mode->maxLM; } else { B = 1; - N = mode->shortMdctSize<shortMdctSize<maxLM-LM; } - c=0; do { - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + + if (CC==2&&C==1) + { + /* Copying a mono streams to two channels */ + celt_sig *freq2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ + freq2 = out_syn[1]+overlap/2; + OPUS_COPY(freq2, freq, N); for (b=0;bmdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); - } while (++cmdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); + for (b=0;bmdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch); + } else if (CC==1&&C==2) + { + /* Downmixing a stereo stream to mono */ + celt_sig *freq2; + freq2 = out_syn[0]+overlap/2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Use the output buffer as temp array before downmixing. */ + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, + downsample, silence); + for (i=0;imdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); + } else { + /* Normal case (mono or stereo) */ + c=0; do { + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, + downsample, silence); + for (b=0;bmdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch); + } while (++c>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + RESTORE_STACK; + return pitch_index; +} + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) { int c; int i; @@ -343,11 +425,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R int nbEBands; int overlap; int start; - int downsample; int loss_count; int noise_based; const opus_int16 *eBands; - VARDECL(celt_sig, scratch); SAVE_STACK; mode = st->mode; @@ -367,40 +447,37 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R loss_count = st->loss_count; start = st->start; - downsample = st->downsample; noise_based = loss_count >= 5 || start != 0; - ALLOC(scratch, noise_based?N*C:N, celt_sig); if (noise_based) { /* Noise-based PLC/CNG */ - celt_sig *freq; +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif opus_uint32 seed; - opus_val16 *plcLogE; int end; int effEnd; - + opus_val16 decay; end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); - /* Share the interleaved signal MDCT coefficient buffer with the - deemphasis scratch buffer. */ - freq = scratch; +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[C-1]+overlap/2); +#else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif - if (loss_count >= 5) - plcLogE = backgroundLogE; - else { - /* Energy decay */ - opus_val16 decay = loss_count==0 ? - QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); - c=0; do - { - for (i=start;irng; for (c=0;c>20); } - renormalise_vector(X+boffs, blen, Q15ONE); + renormalise_vector(X+boffs, blen, Q15ONE, st->arch); } } st->rng = seed; - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<>1)); } while (++cdownsample, 0, st->arch); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -445,15 +514,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R if (loss_count == 0) { - VARDECL( opus_val16, lp_pitch_buf ); - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, st->arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - st->last_pitch_index = pitch_index; + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); @@ -516,7 +577,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } /* Compute the excitation for exc_length samples before the loss. */ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, - exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem); + exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch); } /* Check if the waveform is decaying, and if so how fast. @@ -583,7 +644,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R the signal domain. */ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, - lpc_mem); + lpc_mem, st->arch); } /* Check if the synthesis energy is higher than expected, which can @@ -631,7 +692,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R comb_filter(etmp, buf+DECODE_BUFFER_SIZE, st->postfilter_period, st->postfilter_period, overlap, -st->postfilter_gain, -st->postfilter_gain, - st->postfilter_tapset, st->postfilter_tapset, NULL, 0); + st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch); /* Simulate TDAC on the concealed audio so that it blends with the MDCT of the next frame. */ @@ -644,22 +705,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++cpreemph, st->preemph_memD, scratch); - st->loss_count = loss_count+1; RESTORE_STACK; } -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; opus_int32 bits; ec_dec _dec; - VARDECL(celt_sig, freq); +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); @@ -677,6 +739,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat int intra_ener; const int CC = st->channels; int LM, M; + int start; + int end; int effEnd; int codedBands; int alloc_trim; @@ -703,11 +767,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; frame_size *= st->downsample; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - } while (++c_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; @@ -725,7 +788,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data0<0) return OPUS_INVALID_PACKET; } - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); LM = (data0>>3)&0x3; C = 1 + ((data0>>2)&0x1); data++; @@ -752,14 +815,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return OPUS_BAD_ARG; N = M*mode->shortMdctSize; + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++cend; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, N, LM); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -795,7 +863,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat postfilter_gain = 0; postfilter_pitch = 0; postfilter_tapset = 0; - if (st->start==0 && tell+16 <= total_bits) + if (start==0 && tell+16 <= total_bits) { if(ec_dec_bit_logp(dec, 1)) { @@ -826,11 +894,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat /* Decode the global flags (first symbols in the stream) */ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; /* Get band energies */ - unquant_coarse_energy(mode, st->start, st->end, oldBandE, + unquant_coarse_energy(mode, start, end, oldBandE, intra_ener, dec, C, LM); ALLOC(tf_res, nbEBands, int); - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + tf_decode(start, end, isTransient, tf_res, LM, dec); tell = ec_tell(dec); spread_decision = SPREAD_NORMAL; @@ -846,7 +914,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); - for (i=st->start;iend;i++) + for (i=start;istart, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); + + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++cstart, st->end, X, C==2 ? X+N : NULL, collapse_masks, +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[CC-1]+overlap/2); +#else + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif + + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, - len*(8<rng); + len*(8<rng, st->arch); if (anti_collapse_rsv > 0) { anti_collapse_on = ec_dec_bits(dec, 1); } - unquant_energy_finalise(mode, st->start, st->end, oldBandE, + unquant_energy_finalise(mode, start, end, oldBandE, fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); if (anti_collapse_on) anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - - ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch); if (silence) { for (i=0;istart, effEnd, C, M); - } - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++cdownsample!=1) - bound = IMIN(bound, N/st->downsample); - for (i=bound;idownsample, silence, st->arch); c=0; do { st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, - mode->window, overlap); + mode->window, overlap, st->arch); if (LM!=0) comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, - mode->window, overlap); + mode->window, overlap, st->arch); } while (++cpostfilter_period_old = st->postfilter_period; @@ -978,32 +1024,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->postfilter_tapset_old = st->postfilter_tapset; } - if (C==1) { - for (i=0;iloss_count < 10) + max_background_increase = M*QCONST16(0.001f,DB_SHIFT); + else + max_background_increase = QCONST16(1.f,DB_SHIFT); for (i=0;i<2*nbEBands;i++) - oldLogE2[i] = oldLogE[i]; - for (i=0;i<2*nbEBands;i++) - oldLogE[i] = oldBandE[i]; - for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); + backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); } else { for (i=0;i<2*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); } c=0; do { - for (i=0;istart;i++) + for (i=0;iend;irng = dec->rng; - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1028,7 +1077,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1045,7 +1094,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j0) for (j=0;jmode = mode; - st->overlap = mode->overlap; st->stream_channels = st->channels = channels; st->upsample = 1; @@ -276,8 +274,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int } /*printf("\n");*/ /* First few samples are bad because we don't propagate the memory */ - for (i=0;i<12;i++) - tmp[i] = 0; + OPUS_CLEAR(tmp, 12); #ifdef FIXED_POINT /* Normalize tmp to max range */ @@ -346,9 +343,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int { int id; #ifdef FIXED_POINT - id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */ + id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ #else - id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */ + id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ #endif unmask += inv_table[id]; } @@ -366,7 +363,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Arbitrary metric for VBR boost */ tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ - *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); + *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); /*printf("%d %f\n", tf_max, mask_metric);*/ RESTORE_STACK; #ifdef FUZZING @@ -378,8 +375,8 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Looks for sudden increases of energy to decide whether we need to patch the transient decision */ -int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int end, int C) +static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, + int start, int end, int C) { int i, c; opus_val32 mean_diff=0; @@ -388,28 +385,28 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, avoid false detection caused by irrelevant bands */ if (C==1) { - spread_old[0] = oldE[0]; - for (i=1;i=0;i--) + for (i=end-2;i>=start;i--) spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); /* Compute mean increase */ c=0; do { - for (i=2;i QCONST16(1.f, DB_SHIFT); } @@ -417,9 +414,10 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, /** Apply window and compute the MDCT for all sub-frames and all channels in a frame */ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample) + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, + int arch) { - const int overlap = OVERLAP(mode); + const int overlap = mode->overlap; int N; int B; int shift; @@ -438,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS for (b=0;bmdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B); + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, + &out[b+c*N*B], mode->window, overlap, shift, B, + arch); } } while (++ceBands[i+1]-m->eBands[i])<eBands[i+1]-m->eBands[i])==1; - for (j=0;jeBands[i]<eBands[i]<>LM, 1<eBands[i]<eBands[i+1]<eBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])<eBands[i]<eBands[i+1]<eBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])< QCONST16(.995f,10)) - trim_index-=4; - else if (sum > QCONST16(.92f,10)) - trim_index-=3; - else if (sum > QCONST16(.85f,10)) - trim_index-=2; - else if (sum > QCONST16(.8f,10)) - trim_index-=1; /* mid-side savings estimations based on the LF average*/ logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); /* mid-side savings estimations based on min correlation */ @@ -819,14 +809,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, } while (++c QCONST16(2.f, DB_SHIFT)) - trim_index--; - if (diff > QCONST16(8.f, DB_SHIFT)) - trim_index--; - if (diff < -QCONST16(4.f, DB_SHIFT)) - trim_index++; - if (diff < -QCONST16(10.f, DB_SHIFT)) - trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); @@ -836,6 +818,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); } +#else + (void)analysis; #endif #ifdef FIXED_POINT @@ -843,10 +827,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, #else trim_index = (int)floor(.5f+trim); #endif - if (trim_index<0) - trim_index = 0; - if (trim_index>10) - trim_index = 10; + trim_index = IMAX(0, IMIN(10, trim_index)); /*printf("%d\n", trim_index);*/ #ifdef FUZZING trim_index = rand()%11; @@ -886,6 +867,66 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); } +#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) +static opus_val16 median_of_5(const opus_val16 *x) +{ + opus_val16 t0, t1, t2, t3, t4; + t2 = x[2]; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + if (x[3] > x[4]) + { + t3 = x[4]; + t4 = x[3]; + } else { + t3 = x[3]; + t4 = x[4]; + } + if (t0 > t3) + { + MSWAP(t0, t3); + MSWAP(t1, t4); + } + if (t2 > t1) + { + if (t1 < t3) + return MIN16(t2, t3); + else + return MIN16(t4, t1); + } else { + if (t2 < t3) + return MIN16(t1, t3); + else + return MIN16(t2, t4); + } +} + +static opus_val16 median_of_3(const opus_val16 *x) +{ + opus_val16 t0, t1, t2; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + t2 = x[2]; + if (t1 < t2) + return t1; + else if (t0 < t2) + return t2; + else + return t0; +} + static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, @@ -899,8 +940,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 SAVE_STACK; ALLOC(follower, C*nbEBands, opus_val16); ALLOC(noise_floor, C*nbEBands, opus_val16); - for (i=0;i bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) last=i; - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); + f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); } for (i=last-1;i>=0;i--) - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + + /* Combine with a median filter to avoid dynalloc triggering unnecessarily. + The "offset" value controls how conservative we are -- a higher offset + reduces the impact of the median filter and makes dynalloc use more bits. */ + offset = QCONST16(1.f, DB_SHIFT); + for (i=2;imode; + overlap = mode->overlap; ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); pre[0] = _pre; @@ -1027,7 +1087,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, c=0; do { OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); - OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); + OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); } while (++cprefilter_period, st->prefilter_gain); + N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); if (pitch_index > COMBFILTER_MAXPERIOD-2) pitch_index = COMBFILTER_MAXPERIOD-2; gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); @@ -1100,18 +1160,18 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, /*printf("%d %f\n", pitch_index, gain1);*/ c=0; do { - int offset = mode->shortMdctSize-st->overlap; + int offset = mode->shortMdctSize-overlap; st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap); + OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); if (offset) - comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD, + comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, - st->prefilter_tapset, st->prefilter_tapset, NULL, 0); + st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); - comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, + comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, - st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap); - OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap); + st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); + OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); if (N>COMBFILTER_MAXPERIOD) { @@ -1196,6 +1256,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 /*printf("%f %f ", analysis->tonality, tonal);*/ target = tonal_target; } +#else + (void)analysis; + (void)pitch_change; #endif if (has_surround_mask&&!lfe) @@ -1273,6 +1336,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int LM, M; int tf_select; int nbFilledBytes, nbAvailableBytes; + int start; + int end; int effEnd; int codedBands; int tf_sum; @@ -1316,6 +1381,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; tf_estimate = 0; if (nbCompressedBytes<2 || pcm==NULL) { @@ -1335,8 +1402,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, M=1<shortMdctSize; - prefilter_mem = st->in_mem+CC*(st->overlap); - oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD)); + prefilter_mem = st->in_mem+CC*(overlap); + oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + CC*nbEBands; oldLogE2 = oldLogE + CC*nbEBands; @@ -1352,8 +1419,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #ifdef CUSTOM_MODES if (st->signalling && enc==NULL) { - int tmp = (mode->effEBands-st->end)>>1; - st->end = IMAX(1, mode->effEBands-tmp); + int tmp = (mode->effEBands-end)>>1; + end = st->end = IMAX(1, mode->effEBands-tmp); compressed[0] = tmp<<5; compressed[0] |= LM<<3; compressed[0] |= (C==2)<<2; @@ -1436,11 +1503,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } total_bits = nbCompressedBytes*8; - effEnd = st->end; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; - ALLOC(in, CC*(N+st->overlap), celt_sig); + ALLOC(in, CC*(N+overlap), celt_sig); sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); @@ -1474,8 +1541,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, enc->nbits_total+=tell-ec_tell(enc); } c=0; do { - celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, st->clip); + int need_clip=0; +#ifndef FIXED_POINT + need_clip = st->clip && sample_max>65536.f; +#endif + celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, + mode->preemph, st->preemph_memE+c, need_clip); } while (++clfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); prefilter_tapset = st->tapset_decision; @@ -1494,7 +1565,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, pitch_change = 1; if (pf_on==0) { - if(st->start==0 && tell+16<=total_bits) + if(start==0 && tell+16<=total_bits) ec_enc_bit_logp(enc, 0, 1); } else { /*This block is not gated by a total bits check only because @@ -1515,7 +1586,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, shortBlocks = 0; if (st->complexity >= 1 && !st->lfe) { - isTransient = transient_analysis(in, N+st->overlap, CC, + isTransient = transient_analysis(in, N+overlap, CC, &tf_estimate, &tf_chan); } if (LM>0 && ec_tell(enc)+3<=total_bits) @@ -1535,33 +1606,32 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(bandLogE2, C*nbEBands, opus_val16); if (secondMdct) { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C); + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); for (i=0;iupsample); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); if (CC==2&&C==1) tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, M); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); if (st->lfe) { - for (i=2;iend;i++) + for (i=2;iend, bandE, bandLogE, C); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); ALLOC(surround_dynalloc, C*nbEBands, opus_val16); - for(i=0;iend;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, end); /* This computes how much masking takes place between surround channels */ - if (st->start==0&&st->energy_mask&&!st->lfe) + if (start==0&&st->energy_mask&&!st->lfe) { int mask_end; int midband; @@ -1584,6 +1654,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, diff += MULT16_16(mask, 1+2*i-mask_end); } } + celt_assert(count>0); mask_avg = DIV32_16(mask_avg,count); mask_avg += QCONST16(.2f, DB_SHIFT); diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); @@ -1621,8 +1692,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, disabling masking. */ mask_avg = 0; diff = 0; - for(i=0;istart;iend;i++) + for(i=start;iend-st->start); + frame_avg /= (end-start); temporal_vbr = SUB16(frame_avg,st->spec_avg); temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); @@ -1658,21 +1728,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (!secondMdct) { - for (i=0;i0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C)) + if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) { isTransient = 1; shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ for (i=0;i=15*C && st->start==0 && st->complexity>=2 && !st->lfe) + if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) { int lambda; if (effectiveBytes<40) @@ -1703,22 +1772,22 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, lambda = 3; lambda*=2; tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); - for (i=effEnd;iend;i++) + for (i=effEnd;iend;i++) + for (i=0;istart, st->end, effEnd, bandLogE, + quant_coarse_energy(mode, start, end, effEnd, bandLogE, oldBandE, total_bits, error, enc, C, LM, nbAvailableBytes, st->force_intra, &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); - tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); + tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); if (ec_tell(enc)+4<=total_bits) { @@ -1726,7 +1795,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->tapset_decision = 0; st->spread_decision = SPREAD_NORMAL; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) { if (st->complexity == 0) st->spread_decision = SPREAD_NONE; @@ -1760,7 +1829,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(offsets, nbEBands, int); - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, + maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ @@ -1773,7 +1842,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, total_bits<<=BITRES; total_boost = 0; tell = ec_tell_frac(enc); - for (i=st->start;iend;i++) + for (i=start;iintensity = hysteresis_decision((opus_val16)(equiv_rate/1000), intensity_thresholds, intensity_histeresis, 21, st->intensity); - st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); + st->intensity = IMIN(end,IMAX(start, st->intensity)); } alloc_trim = 5; @@ -1828,7 +1897,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); + end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, + st->intensity, surround_trim, st->arch); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1930,7 +2000,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, bits = (((opus_int32)nbCompressedBytes*8)<=2&&bits>=((LM+2)<end-1; + signalBandwidth = end-1; #ifndef DISABLE_FLOAT_API if (st->analysis.valid) { @@ -1950,7 +2020,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (st->lfe) signalBandwidth = 1; - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); if (st->lastCodedBands) @@ -1958,13 +2028,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, else st->lastCodedBands = codedBands; - quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); + quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); /* Residual quantisation */ ALLOC(collapse_masks, C*nbEBands, unsigned char); - quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, - nbCompressedBytes*(8<rng); + quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, + bandE, pulses, shortBlocks, st->spread_decision, + dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<rng, st->arch); if (anti_collapse_rsv > 0) { @@ -1974,7 +2045,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif ec_enc_bits(enc, anti_collapse_on, 1); } - quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); + quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); if (silence) { @@ -1990,40 +2061,26 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (anti_collapse_on) { anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - } - - if (silence) - { - for (i=0;istart, effEnd, C, M); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); } c=0; do { OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); } while (++csyn_mem[c]+2*MAX_PERIOD-N; } while (++cupsample, silence, st->arch); c=0; do { st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, - mode->window, st->overlap); + mode->window, overlap); if (LM!=0) comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, @@ -2031,7 +2088,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } while (++cupsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD); st->prefilter_period_old = st->prefilter_period; st->prefilter_gain_old = st->prefilter_gain; st->prefilter_tapset_old = st->prefilter_tapset; @@ -2051,16 +2108,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (CC==2&&C==1) { - for (i=0;istart;i++) + for (i=0;iend;iin_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD)); + oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + st->channels*st->mode->nbEBands; oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; OPUS_CLEAR((char*)&st->ENCODER_RESET_START, diff --git a/media/libopus/celt/celt_lpc.c b/media/libopus/celt/celt_lpc.c index fa29d626ea..f02145af0d 100644 --- a/media/libopus/celt/celt_lpc.c +++ b/media/libopus/celt/celt_lpc.c @@ -88,12 +88,15 @@ int p #endif } -void celt_fir(const opus_val16 *_x, + +void celt_fir_c( + const opus_val16 *_x, const opus_val16 *num, opus_val16 *_y, int N, int ord, - opus_val16 *mem) + opus_val16 *mem, + int arch) { int i,j; VARDECL(opus_val16, rnum); @@ -111,6 +114,7 @@ void celt_fir(const opus_val16 *_x, for(i=0;i non-sse + * arch[1] -> sse + * arch[2] -> sse2 + * arch[3] -> sse4.1 + * arch[4] -> avx + */ +#define OPUS_ARCHMASK 7 +int opus_select_arch(void); + #else #define OPUS_ARCHMASK 0 @@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void) return 0; } #endif - #endif diff --git a/media/libopus/celt/cwrs.c b/media/libopus/celt/cwrs.c index ad980cc7d8..2fa9f89cd6 100644 --- a/media/libopus/celt/cwrs.c +++ b/media/libopus/celt/cwrs.c @@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); } -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ opus_uint32 p; int s; int k0; + opus_int16 val; + opus_val32 yy=0; celt_assert(_k>0); celt_assert(_n>1); while(_n>2){ @@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ } else for(p=row[_k];p>_i;p=row[_k])_k--; _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } /*Lots of dimensions case:*/ else{ @@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } } _n--; @@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; _k=(_i+1)>>1; if(_k)_i-=2*_k-1; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); /*_n==1*/ s=-(int)_i; - *_y=(_k+s)^s; + val=(_k+s)^s; + *_y=val; + yy=MAC16_16(yy,val,val); + return yy; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ @@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ _y: Returns the vector of pulses. _u: Must contain entries [0..._k+1] of row _n of U() on input. Its contents will be destructively modified.*/ -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ int j; + opus_int16 val; + opus_val32 yy=0; celt_assert(_n>0); j=0; do{ @@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ while(p>_i)p=_u[--_k]; _i-=p; yj-=_k; - _y[j]=(yj+s)^s; + val=(yj+s)^s; + _y[j]=val; + yy=MAC16_16(yy,val,val); uprev(_u,_k+2,0); } while(++j<_n); + return yy; } /*Returns the index of the given combination of K elements chosen from a set @@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ VARDECL(opus_uint32,u); + int ret; SAVE_STACK; celt_assert(_k>0); ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); RESTORE_STACK; + return ret; } #endif /* SMALL_FOOTPRINT */ diff --git a/media/libopus/celt/cwrs.h b/media/libopus/celt/cwrs.h index 7dfbd076d1..7cd4717459 100644 --- a/media/libopus/celt/cwrs.h +++ b/media/libopus/celt/cwrs.h @@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); void encode_pulses(const int *_y, int N, int K, ec_enc *enc); -void decode_pulses(int *_y, int N, int K, ec_dec *dec); +opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); #endif /* CWRS_H */ diff --git a/media/libopus/celt/entcode.c b/media/libopus/celt/entcode.c index fa5d7c7c2c..70f32016ec 100644 --- a/media/libopus/celt/entcode.c +++ b/media/libopus/celt/entcode.c @@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){ } #endif +#if 1 +/* This is a faster version of ec_tell_frac() that takes advantage + of the low (1/8 bit) resolution to use just a linear function + followed by a lookup to determine the exact transition thresholds. */ +opus_uint32 ec_tell_frac(ec_ctx *_this){ + static const unsigned correction[8] = + {35733, 38967, 42495, 46340, + 50535, 55109, 60097, 65535}; + opus_uint32 nbits; + opus_uint32 r; + int l; + unsigned b; + nbits=_this->nbits_total<rng); + r=_this->rng>>(l-16); + b = (r>>12)-8; + b += r>correction[b]; + l = (l<<3)+b; + return nbits-l; +} +#else opus_uint32 ec_tell_frac(ec_ctx *_this){ opus_uint32 nbits; opus_uint32 r; @@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){ } return nbits-l; } +#endif + +#ifdef USE_SMALL_DIV_TABLE +/* Result of 2^32/(2*i+1), except for i=0. */ +const opus_uint32 SMALL_DIV_TABLE[129] = { + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 +}; +#endif diff --git a/media/libopus/celt/entcode.h b/media/libopus/celt/entcode.h index dd13e49e50..13d6c84ef0 100644 --- a/media/libopus/celt/entcode.h +++ b/media/libopus/celt/entcode.h @@ -34,6 +34,12 @@ # include # include "ecintrin.h" +extern const opus_uint32 SMALL_DIV_TABLE[129]; + +#ifdef OPUS_ARM_ASM +#define USE_SMALL_DIV_TABLE +#endif + /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a larger type, you can speed up the decoder by using it here.*/ typedef opus_uint32 ec_window; @@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){ rounding error is in the positive direction).*/ opus_uint32 ec_tell_frac(ec_ctx *_this); +/* Tested exhaustively for all n and for 1<=d<=256 */ +static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (d>256) + return n/d; + else { + opus_uint32 t, q; + t = EC_ILOG(d&-d); + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; + return q+(n-q*d >= d); + } +#else + return n/d; +#endif +} + +static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (n<0) + return -(opus_int32)celt_udiv(-n, d); + else + return celt_udiv(n, d); +#else + return n/d; +#endif +} + #endif diff --git a/media/libopus/celt/entdec.c b/media/libopus/celt/entdec.c index 3c264685c2..0b3433ed8b 100644 --- a/media/libopus/celt/entdec.c +++ b/media/libopus/celt/entdec.c @@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned s; - _this->ext=_this->rng/_ft; + _this->ext=celt_udiv(_this->rng,_ft); s=(unsigned)(_this->val/_this->ext); return _ft-EC_MINI(s+1,_ft); } diff --git a/media/libopus/celt/entenc.c b/media/libopus/celt/entenc.c index a7e34ecef9..f1750d25b8 100644 --- a/media/libopus/celt/entenc.c +++ b/media/libopus/celt/entenc.c @@ -98,7 +98,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){ else _this->ext++; } -static void ec_enc_normalize(ec_enc *_this){ +static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){ /*If the range is too small, output some bits and rescale it.*/ while(_this->rng<=EC_CODE_BOT){ ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); @@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ opus_uint32 r; - r=_this->rng/_ft; + r=celt_udiv(_this->rng,_ft); if(_fl>0){ _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); _this->rng=IMUL32(r,(_fh-_fl)); diff --git a/media/libopus/celt/fixed_debug.h b/media/libopus/celt/fixed_debug.h index 80bc94910f..d28227f5dc 100644 --- a/media/libopus/celt/fixed_debug.h +++ b/media/libopus/celt/fixed_debug.h @@ -496,6 +496,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) +#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b)))) static OPUS_INLINE int SATURATE(int a, int b) { @@ -767,6 +768,16 @@ static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) return res; } +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + + #undef PRINT_MIPS #define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); diff --git a/media/libopus/celt/fixed_generic.h b/media/libopus/celt/fixed_generic.h index ecf018a244..ac67d37ce8 100644 --- a/media/libopus/celt/fixed_generic.h +++ b/media/libopus/celt/fixed_generic.h @@ -113,7 +113,11 @@ /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) +#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. + Results fits in 32 bits */ +#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) @@ -131,4 +135,17 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) +#if defined(MIPSr1_ASM) +#include "mips/fixed_generic_mipsr1.h" +#endif + +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + #endif diff --git a/media/libopus/celt/float_cast.h b/media/libopus/celt/float_cast.h index ede6574860..ed5a39b543 100644 --- a/media/libopus/celt/float_cast.h +++ b/media/libopus/celt/float_cast.h @@ -90,14 +90,14 @@ #include #define float2int(x) lrint(x) -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64)) +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) #include __inline long int float2int(float value) { return _mm_cvtss_si32(_mm_load_ss(&value)); } -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32)) +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) #include /* Win32 doesn't seem to have these functions. diff --git a/media/libopus/celt/kiss_fft.c b/media/libopus/celt/kiss_fft.c index ad706c7397..4ed37d2bb7 100644 --- a/media/libopus/celt/kiss_fft.c +++ b/media/libopus/celt/kiss_fft.c @@ -47,64 +47,56 @@ static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jr = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jtwiddles; - for (j=0;jr = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; + } + } else { + int j; + kiss_fft_cpx scratch[6]; + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + const int m2=2*m; + const int m3=3*m; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;itwiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;jtwiddles; - for (j=0;jtwiddles[fstride*m]; +#endif for (i=0;itwiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -255,56 +230,8 @@ static void kf_bfly3( } } -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;itwiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - }while(--k); - } -} +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -317,13 +244,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;itwiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -488,6 +355,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;infft=nfft; -#ifndef FIXED_POINT +#ifdef FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -581,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co if (st->bitrev==NULL) goto fail; compute_bitrev_table(0, bitrev, 1,1, st->factors,st); + + /* Initialize architecture specific fft parameters */ + if (opus_fft_alloc_arch(st, arch)) + goto fail; } return st; fail: - opus_fft_free(st); + opus_fft_free(st, arch); return NULL; } -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) { - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); } -void opus_fft_free(const kiss_fft_state *cfg) +void opus_fft_free_arch_c(kiss_fft_state *st) { + (void)st; +} + +void opus_fft_free(const kiss_fft_state *cfg, int arch) { if (cfg) { + opus_fft_free_arch((kiss_fft_state *)cfg, arch); opus_free((opus_int16*)cfg->bitrev); if (cfg->shift < 0) opus_free((kiss_twiddle_cpx*)cfg->twiddles); @@ -606,7 +518,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -618,17 +530,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -647,7 +548,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;infft;i++) + { + kiss_fft_cpx x = fin[i]; + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); + } + opus_fft_impl(st, fout); +} + + +void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;infft;i++) fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) - { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<nfft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;infft;i++) + fout[i].i = -fout[i].i; } - diff --git a/media/libopus/celt/kiss_fft.h b/media/libopus/celt/kiss_fft.h index 66332e3bb9..bffa2bfad6 100644 --- a/media/libopus/celt/kiss_fft.h +++ b/media/libopus/celt/kiss_fft.h @@ -32,6 +32,7 @@ #include #include #include "arch.h" +#include "cpu_support.h" #ifdef __cplusplus extern "C" { @@ -77,17 +78,28 @@ typedef struct { 4*4*4*2 */ +typedef struct arch_fft_state{ + int is_supported; + void *priv; +} arch_fft_state; + typedef struct kiss_fft_state{ int nfft; -#ifndef FIXED_POINT - kiss_fft_scalar scale; + opus_val16 scale; +#ifdef FIXED_POINT + int scale_shift; #endif int shift; opus_int16 factors[2*MAXFACTORS]; const opus_int16 *bitrev; const kiss_twiddle_cpx *twiddles; + arch_fft_state *arch_fft; } kiss_fft_state; +#if defined(HAVE_ARM_NE10) +#include "arm/fft_arm.h" +#endif + /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ /** @@ -113,9 +125,9 @@ typedef struct kiss_fft_state{ * buffer size in *lenmem. * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); /** * opus_fft(cfg,in_out_buf) @@ -127,10 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); * Note that each element is complex and can be accessed like f[k].r and f[k].i * */ -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_fft_free(const kiss_fft_state *cfg); +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); + +void opus_fft_free(const kiss_fft_state *cfg, int arch); + + +void opus_fft_free_arch_c(kiss_fft_state *st); +int opus_fft_alloc_arch_c(kiss_fft_state *st); + +#if !defined(OVERRIDE_OPUS_FFT) +/* Is run-time CPU detection enabled on this platform? */ +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) + +extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); + +#define opus_fft_alloc_arch(_st, arch) \ + ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); +#define opus_fft_free_arch(_st, arch) \ + ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) + +extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_ifft(_cfg, _fin, _fout, arch) \ + ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) + +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arch_c(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arch_c(_st)) + +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) + +#define opus_ifft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) + +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ #ifdef __cplusplus } diff --git a/media/libopus/celt/mdct.c b/media/libopus/celt/mdct.c index 90a214ad0e..5315ad11a3 100644 --- a/media/libopus/celt/mdct.c +++ b/media/libopus/celt/mdct.c @@ -53,76 +53,100 @@ #include "mathops.h" #include "stack_alloc.h" +#if defined(MIPSr1_ASM) +#include "mips/mdct_mipsr1.h" +#endif + + #ifdef CUSTOM_MODES -int clt_mdct_init(mdct_lookup *l,int N, int maxshift) +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) { int i; - int N4; kiss_twiddle_scalar *trig; -#if defined(FIXED_POINT) + int shift; int N2=N>>1; -#endif l->n = N; - N4 = N>>2; l->maxshift = maxshift; for (i=0;i<=maxshift;i++) { if (i==0) - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); + l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); else - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); + l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); #ifndef ENABLE_TI_DSPLIB55 if (l->kfft[i]==NULL) return 0; #endif } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return 0; - /* We have enough points that sine isn't necessary */ + for (shift=0;shift<=maxshift;shift++) + { + /* We have enough points that sine isn't necessary */ #if defined(FIXED_POINT) - for (i=0;i<=N4;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); +#if 1 + for (i=0;i>= 1; + N >>= 1; + } return 1; } -void clt_mdct_clear(mdct_lookup *l) +void clt_mdct_clear(mdct_lookup *l, int arch) { int i; for (i=0;i<=l->maxshift;i++) - opus_fft_free(l->kfft[i]); + opus_fft_free(l->kfft[i], arch); opus_free((kiss_twiddle_scalar*)l->trig); } #endif /* CUSTOM_MODES */ /* Forward MDCT trashes the input array */ -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride) +#ifndef OVERRIDE_clt_mdct_forward +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_scalar, f2); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif SAVE_STACK; + (void)arch; + scale = st->scale; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif + ALLOC(f2, N4, kiss_fft_cpx); /* Consider the input to be composed of four blocks: [a, b, c, d] */ /* Window, shuffle, fold */ @@ -167,123 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar /* Pre-rotation */ { kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; for(i=0;ibitrev[i]] = yc; } } - /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; /* Temp pointers to make it really clear to the compiler what we're doing */ for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; yp1 += 2*stride; yp2 -= 2*stride; } } RESTORE_STACK; } +#endif /* OVERRIDE_clt_mdct_forward */ -void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) +#ifndef OVERRIDE_clt_mdct_backward +void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; - VARDECL(kiss_fft_scalar, f2); - SAVE_STACK; + const kiss_twiddle_scalar *trig; + (void) arch; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif /* Pre-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; for(i=0;ikfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice. */ for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i< +#include "celt.h" +#include "pitch.h" +#include "bands.h" +#include "modes.h" +#include "entcode.h" +#include "quant_bands.h" +#include "rate.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "float_cast.h" +#include +#include "celt_lpc.h" +#include "vq.h" + +#define OVERRIDE_comb_filter +void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap, int arch) +{ + int i; + opus_val32 x0, x1, x2, x3, x4; + + (void)arch; + + /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ + opus_val16 g00, g01, g02, g10, g11, g12; + static const opus_val16 gains[3][3] = { + {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, + {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, + {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; + + if (g0==0 && g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y, x, N); + return; + } + + g00 = MULT16_16_P15(g0, gains[tapset0][0]); + g01 = MULT16_16_P15(g0, gains[tapset0][1]); + g02 = MULT16_16_P15(g0, gains[tapset0][2]); + g10 = MULT16_16_P15(g1, gains[tapset1][0]); + g11 = MULT16_16_P15(g1, gains[tapset1][1]); + g12 = MULT16_16_P15(g1, gains[tapset1][2]); + x1 = x[-T1+1]; + x2 = x[-T1 ]; + x3 = x[-T1-1]; + x4 = x[-T1-2]; + /* If the filter didn't change, we don't need the overlap */ + if (g0==g1 && T0==T1 && tapset0==tapset1) + overlap=0; + + for (i=0;itwiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif + + tw=st->twiddles; + + for (i=0;ir += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); + + scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); + scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); + + scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); + scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + + +#endif /* KISS_FFT_MIPSR1_H */ diff --git a/media/libopus/celt/mips/mdct_mipsr1.h b/media/libopus/celt/mips/mdct_mipsr1.h new file mode 100644 index 0000000000..2934dab776 --- /dev/null +++ b/media/libopus/celt/mips/mdct_mipsr1.h @@ -0,0 +1,288 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* This is a simple MDCT implementation that uses a N/4 complex FFT + to do most of the work. It should be relatively straightforward to + plug in pretty much and FFT here. + + This replaces the Vorbis FFT (and uses the exact same API), which + was a bit too messy and that was ending up duplicating code + (might as well use the same FFT everywhere). + + The algorithm is similar to (and inspired from) Fabrice Bellard's + MDCT implementation in FFMPEG, but has differences in signs, ordering + and scaling in many places. +*/ +#ifndef __MDCT_MIPSR1_H__ +#define __MDCT_MIPSR1_H__ + +#ifndef SKIP_CONFIG_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#endif + +#include "mdct.h" +#include "kiss_fft.h" +#include "_kiss_fft_guts.h" +#include +#include "os_support.h" +#include "mathops.h" +#include "stack_alloc.h" + +/* Forward MDCT trashes the input array */ +#define OVERRIDE_clt_mdct_forward +void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif + + (void)arch; + + SAVE_STACK; + scale = st->scale; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); + *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;ibitrev[i]] = yc; + } + } + + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); + + /* Post-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); + const kiss_twiddle_scalar *t = &trig[0]; + /* Temp pointers to make it really clear to the compiler what we're doing */ + for(i=0;ii,t[N4+i] , fp->r,t[i]); + yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +#define OVERRIDE_clt_mdct_backward +void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + const kiss_twiddle_scalar *trig; + + (void)arch; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; + for(i=0;ikfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp0[1]; + im = yp0[0]; + t0 = t[i]; + t1 = t[N4+i]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0 , im,t1); + yi = S_MUL_SUB(re,t1 , im,t0); + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp1[1]; + im = yp1[0]; + yp0[0] = yr; + yp1[1] = yi; + + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0,im,t1); + yi = S_MUL_SUB(re,t1,im,t0); + yp1[0] = yr; + yp0[1] = yi; + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } +} +#endif /* __MDCT_MIPSR1_H__ */ diff --git a/media/libopus/celt/mips/pitch_mipsr1.h b/media/libopus/celt/mips/pitch_mipsr1.h new file mode 100644 index 0000000000..a9500aff58 --- /dev/null +++ b/media/libopus/celt/mips/pitch_mipsr1.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file pitch.h + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PITCH_MIPSR1_H +#define PITCH_MIPSR1_H + +#define OVERRIDE_DUAL_INNER_PROD +static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2, int arch) +{ + int j; + opus_val32 xy01=0; + opus_val32 xy02=0; + + (void)arch; + + asm volatile("MULT $ac1, $0, $0"); + asm volatile("MULT $ac2, $0, $0"); + /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ + for (j=0;j=0;i--) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); + } +} + +#define OVERRIDE_renormalise_vector + +#define renormalise_vector(X, N, gain, arch) \ + (renormalise_vector_mips(X, N, gain, arch)) + +void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) +{ + int i; +#ifdef FIXED_POINT + int k; +#endif + opus_val32 E = EPSILON; + opus_val16 g; + opus_val32 t; + celt_norm *xptr = X; + int X0, X1; + + (void)arch; + + asm volatile("mult $ac1, $0, $0"); + asm volatile("MTLO %0, $ac1" : :"r" (E)); + /*if(N %4) + printf("error");*/ + for (i=0;i>1; +#endif + t = VSHR32(E, 2*(k-7)); + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); + + xptr = X; + for (i=0;imaxLM); if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, - mode->maxLM) == 0) + mode->maxLM, arch) == 0) goto failure; if (error) @@ -408,6 +410,8 @@ failure: #ifdef CUSTOM_MODES void opus_custom_mode_destroy(CELTMode *mode) { + int arch = opus_select_arch(); + if (mode == NULL) return; #ifndef CUSTOM_MODES_ONLY @@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode) opus_free((opus_int16*)mode->cache.index); opus_free((unsigned char*)mode->cache.bits); opus_free((unsigned char*)mode->cache.caps); - clt_mdct_clear(&mode->mdct); + clt_mdct_clear(&mode->mdct, arch); opus_free((CELTMode *)mode); } diff --git a/media/libopus/celt/modes.h b/media/libopus/celt/modes.h index c8340f9875..be813ccc8b 100644 --- a/media/libopus/celt/modes.h +++ b/media/libopus/celt/modes.h @@ -39,14 +39,6 @@ #define MAX_PERIOD 1024 -#ifndef OVERLAP -#define OVERLAP(mode) ((mode)->overlap) -#endif - -#ifndef FRAMESIZE -#define FRAMESIZE(mode) ((mode)->mdctSize) -#endif - typedef struct { int size; const opus_int16 *index; diff --git a/media/libopus/celt/os_support.h b/media/libopus/celt/os_support.h index 5e47e3cff9..a2171971e9 100644 --- a/media/libopus/celt/os_support.h +++ b/media/libopus/celt/os_support.h @@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr) } #endif -/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */ +/** Copy n elements from src to dst. The 0* term provides compile-time type checking */ #ifndef OVERRIDE_OPUS_COPY #define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) #endif -/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term +/** Copy n elements from src to dst, allowing overlapping regions. The 0* term provides compile-time type checking */ #ifndef OVERRIDE_OPUS_MOVE #define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) #endif -/** Set n elements of dst to zero, starting at address s */ +/** Set n elements of dst to zero */ #ifndef OVERRIDE_OPUS_CLEAR #define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) #endif diff --git a/media/libopus/celt/pitch.c b/media/libopus/celt/pitch.c index d2b305441d..1d89cb0342 100644 --- a/media/libopus/celt/pitch.c +++ b/media/libopus/celt/pitch.c @@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); } -#if 0 /* This is a simple version of the pitch correlation that should work - well on DSPs like Blackfin and TI C5x/C6x */ - +/* Pure C implementation. */ #ifdef FIXED_POINT opus_val32 #else void #endif -celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) +#if defined(OVERRIDE_PITCH_XCORR) +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) +#else +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch, int arch) +#endif { + +#if 0 /* This is a simple version of the pitch correlation that should work + well on DSPs like Blackfin and TI C5x/C6x */ int i, j; #ifdef FIXED_POINT opus_val32 maxcorr=1; +#endif +#if !defined(OVERRIDE_PITCH_XCORR) + (void)arch; #endif for (i=0;i0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i>1;i++) { - opus_val32 sum=0; + opus_val32 sum; xcorr[i] = 0; if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) continue; +#ifdef FIXED_POINT + sum = 0; for (j=0;j>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); +#else + sum = celt_inner_prod_c(x_lp, y+i, len>>1); +#endif xcorr[i] = MAX32(-1, sum); #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -401,7 +414,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0_, int prev_period, opus_val16 prev_gain) + int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) { int k, i, T, T0; opus_val16 g, g0; @@ -426,7 +439,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T = T0 = *T0_; ALLOC(yy_lookup, maxperiod+1, opus_val32); - dual_inner_prod(x, x, x-T0, N, &xx, &xy); + dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch); yy_lookup[0] = xx; yy=xx; for (i=1;i<=maxperiod;i++) @@ -456,7 +469,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 g1; opus_val16 cont=0; opus_val16 thresh; - T1 = (2*T0+k)/(2*k); + T1 = celt_udiv(2*T0+k, 2*k); if (T1 < minperiod) break; /* Look for another strong correlation at T1b */ @@ -468,9 +481,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = T0+T1; } else { - T1b = (2*second_check[k]*T0+k)/(2*k); + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } - dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); + dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); xy += xy2; yy = yy_lookup[T1] + yy_lookup[T1b]; #ifdef FIXED_POINT @@ -513,13 +526,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, pg = SHR32(frac_div32(best_xy,best_yy+1),16); for (k=0;k<3;k++) - { - int T1 = T+k-1; - xy = 0; - for (i=0;i MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) offset = 1; else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) diff --git a/media/libopus/celt/pitch.h b/media/libopus/celt/pitch.h index df317ecc1d..65a77a6ecc 100644 --- a/media/libopus/celt/pitch.h +++ b/media/libopus/celt/pitch.h @@ -37,11 +37,17 @@ #include "modes.h" #include "cpu_support.h" -#if defined(__SSE__) && !defined(FIXED_POINT) +#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \ + || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) #include "x86/pitch_sse.h" #endif -#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) +#if defined(MIPSr1_ASM) +#include "mips/pitch_mipsr1.h" +#endif + +#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ + || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) # include "arm/pitch_arm.h" #endif @@ -52,12 +58,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR int len, int max_pitch, int *pitch, int arch); opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0, int prev_period, opus_val16 prev_gain); + int N, int *T0, int prev_period, opus_val16 prev_gain, int arch); + /* OPT: This is the kernel you really want to optimize. It gets used a lot by the prefilter and by the PLC. */ -#ifndef OVERRIDE_XCORR_KERNEL -static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) { int j; opus_val16 y_0, y_1, y_2, y_3; @@ -122,10 +128,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, sum[3] = MAC16_16(sum[3],tmp,y_1); } } + +#ifndef OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)(arch),xcorr_kernel_c(x, y, sum, len)) #endif /* OVERRIDE_XCORR_KERNEL */ -#ifndef OVERRIDE_DUAL_INNER_PROD -static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + +static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2) { int i; @@ -139,8 +149,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y *xy1 = xy01; *xy2 = xy02; } + +#ifndef OVERRIDE_DUAL_INNER_PROD +# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ + ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2)) #endif +/*We make sure a C version is always available for cases where the overhead of + vectorization and passing around an arch flag aren't worth it.*/ +static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x, + const opus_val16 *y, int N) +{ + int i; + opus_val32 xy=0; + for (i=0;inbEBands, opus_val16); diff --git a/media/libopus/celt/rate.c b/media/libopus/celt/rate.c index 1055e63d60..4192120ce8 100644 --- a/media/libopus/celt/rate.c +++ b/media/libopus/celt/rate.c @@ -131,7 +131,7 @@ void compute_pulse_cache(CELTMode *m, int LM) for (i=0;ieBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); band_width = m->eBands[codedBands]-m->eBands[j]; @@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /* Allocate the remaining bits */ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; for (j=start;jeBands[j+1]-m->eBands[j])); @@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, offset += NClogN>>3; /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<>BITRES; /* Make sure not to bust */ if (C*ebits[j] > (bits[j]>>BITRES)) diff --git a/media/libopus/celt/rate.h b/media/libopus/celt/rate.h index f1e0661129..515f7687ce 100644 --- a/media/libopus/celt/rate.h +++ b/media/libopus/celt/rate.h @@ -32,7 +32,7 @@ #define MAX_PSEUDO 40 #define LOG_MAX_PSEUDO 6 -#define MAX_PULSES 128 +#define CELT_MAX_PULSES 128 #define MAX_FINE_BITS 8 diff --git a/media/libopus/celt/stack_alloc.h b/media/libopus/celt/stack_alloc.h index 316a6ce12c..2b51c8d80c 100644 --- a/media/libopus/celt/stack_alloc.h +++ b/media/libopus/celt/stack_alloc.h @@ -116,9 +116,11 @@ #else #ifdef CELT_C +char *scratch_ptr=0; char *global_stack=0; #else extern char *global_stack; +extern char *scratch_ptr; #endif /* CELT_C */ #ifdef ENABLE_VALGRIND @@ -140,8 +142,12 @@ extern char *global_stack_top; #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#if 0 /* Set this to 1 to instrument pseudostack usage */ +#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) +#else #define RESTORE_STACK (global_stack = _saved_stack) -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; +#endif +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ diff --git a/media/libopus/celt/static_modes_fixed.h b/media/libopus/celt/static_modes_fixed.h index 216df9e605..8717d626cb 100644 --- a/media/libopus/celt/static_modes_fixed.h +++ b/media/libopus/celt/static_modes_fixed.h @@ -4,6 +4,11 @@ #include "modes.h" #include "rate.h" +#ifdef HAVE_ARM_NE10 +#define OVERRIDE_FFT 1 +#include "static_modes_fixed_arm_ne10.h" +#endif + #ifndef DEF_WINDOW120 #define DEF_WINDOW120 static const opus_val16 window120[120] = { @@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -426,10 +431,17 @@ static const opus_int16 fft_bitrev60[60] = { #define FFT_STATE48000_960_0 static const kiss_fft_state fft_state48000_960_0 = { 480, /* nfft */ +17476, /* scale */ +8, /* scale_shift */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_480, +#else +NULL, +#endif }; #endif @@ -437,10 +449,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_1 static const kiss_fft_state fft_state48000_960_1 = { 240, /* nfft */ +17476, /* scale */ +7, /* scale_shift */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_240, +#else +NULL, +#endif }; #endif @@ -448,10 +467,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_2 static const kiss_fft_state fft_state48000_960_2 = { 120, /* nfft */ +17476, /* scale */ +6, /* scale_shift */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_120, +#else +NULL, +#endif }; #endif @@ -459,10 +485,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_3 static const kiss_fft_state fft_state48000_960_3 = { 60, /* nfft */ +17476, /* scale */ +5, /* scale_shift */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_60, +#else +NULL, +#endif }; #endif @@ -470,104 +503,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] = { -32767, 32767, 32767, 32767, 32766, -32763, 32762, 32759, 32757, 32753, -32751, 32747, 32743, 32738, 32733, -32729, 32724, 32717, 32711, 32705, -32698, 32690, 32683, 32676, 32667, -32658, 32650, 32640, 32631, 32620, -32610, 32599, 32588, 32577, 32566, -32554, 32541, 32528, 32515, 32502, -32487, 32474, 32459, 32444, 32429, -32413, 32397, 32381, 32364, 32348, -32331, 32313, 32294, 32277, 32257, -32239, 32219, 32200, 32180, 32159, -32138, 32118, 32096, 32074, 32051, -32029, 32006, 31984, 31960, 31936, -31912, 31888, 31863, 31837, 31812, -31786, 31760, 31734, 31707, 31679, -31652, 31624, 31596, 31567, 31539, -31508, 31479, 31450, 31419, 31388, -31357, 31326, 31294, 31262, 31230, -31198, 31164, 31131, 31097, 31063, -31030, 30994, 30959, 30924, 30889, -30853, 30816, 30779, 30743, 30705, -30668, 30629, 30592, 30553, 30515, -30475, 30435, 30396, 30356, 30315, -30274, 30233, 30191, 30149, 30107, -30065, 30022, 29979, 29936, 29891, -29847, 29803, 29758, 29713, 29668, -29622, 29577, 29529, 29483, 29436, -29390, 29341, 29293, 29246, 29197, -29148, 29098, 29050, 29000, 28949, -28899, 28848, 28797, 28746, 28694, -28642, 28590, 28537, 28485, 28432, -28378, 28324, 28271, 28217, 28162, -28106, 28051, 27995, 27940, 27884, -27827, 27770, 27713, 27657, 27598, -27540, 27481, 27423, 27365, 27305, -27246, 27187, 27126, 27066, 27006, -26945, 26883, 26822, 26760, 26698, -26636, 26574, 26510, 26448, 26383, -26320, 26257, 26191, 26127, 26062, -25997, 25931, 25866, 25800, 25734, -25667, 25601, 25533, 25466, 25398, -25330, 25262, 25194, 25125, 25056, -24987, 24917, 24848, 24778, 24707, -24636, 24566, 24495, 24424, 24352, -24280, 24208, 24135, 24063, 23990, -23917, 23842, 23769, 23695, 23622, -23546, 23472, 23398, 23322, 23246, -23171, 23095, 23018, 22942, 22866, -22788, 22711, 22634, 22557, 22478, -22400, 22322, 22244, 22165, 22085, -22006, 21927, 21846, 21766, 21687, -21606, 21524, 21443, 21363, 21282, -21199, 21118, 21035, 20954, 20870, -20788, 20705, 20621, 20538, 20455, -20371, 20286, 20202, 20118, 20034, -19947, 19863, 19777, 19692, 19606, -19520, 19434, 19347, 19260, 19174, -19088, 18999, 18911, 18825, 18737, -18648, 18560, 18472, 18384, 18294, -18205, 18116, 18025, 17936, 17846, -17757, 17666, 17576, 17485, 17395, -17303, 17212, 17122, 17030, 16937, -16846, 16755, 16662, 16569, 16477, -16385, 16291, 16198, 16105, 16012, -15917, 15824, 15730, 15636, 15541, -15447, 15352, 15257, 15162, 15067, -14973, 14875, 14781, 14685, 14589, -14493, 14396, 14300, 14204, 14107, -14010, 13914, 13815, 13718, 13621, -13524, 13425, 13328, 13230, 13133, -13033, 12935, 12836, 12738, 12638, -12540, 12441, 12341, 12241, 12142, -12044, 11943, 11843, 11744, 11643, -11542, 11442, 11342, 11241, 11139, -11039, 10939, 10836, 10736, 10635, -10534, 10431, 10330, 10228, 10127, -10024, 9921, 9820, 9718, 9614, -9512, 9410, 9306, 9204, 9101, -8998, 8895, 8791, 8689, 8585, -8481, 8377, 8274, 8171, 8067, -7962, 7858, 7753, 7650, 7545, -7441, 7336, 7231, 7129, 7023, -6917, 6813, 6709, 6604, 6498, -6393, 6288, 6182, 6077, 5973, -5867, 5760, 5656, 5549, 5445, -5339, 5232, 5127, 5022, 4914, -4809, 4703, 4596, 4490, 4384, -4278, 4171, 4065, 3958, 3852, -3745, 3640, 3532, 3426, 3318, -3212, 3106, 2998, 2891, 2786, -2679, 2570, 2465, 2358, 2251, -2143, 2037, 1929, 1823, 1715, -1609, 1501, 1393, 1287, 1180, -1073, 964, 858, 751, 644, -535, 429, 322, 214, 107, -0, }; +static const opus_val16 mdct_twiddles960[1800] = { +32767, 32767, 32767, 32766, 32765, +32763, 32761, 32759, 32756, 32753, +32750, 32746, 32742, 32738, 32733, +32728, 32722, 32717, 32710, 32704, +32697, 32690, 32682, 32674, 32666, +32657, 32648, 32639, 32629, 32619, +32609, 32598, 32587, 32576, 32564, +32552, 32539, 32526, 32513, 32500, +32486, 32472, 32457, 32442, 32427, +32411, 32395, 32379, 32362, 32345, +32328, 32310, 32292, 32274, 32255, +32236, 32217, 32197, 32177, 32157, +32136, 32115, 32093, 32071, 32049, +32027, 32004, 31981, 31957, 31933, +31909, 31884, 31859, 31834, 31809, +31783, 31756, 31730, 31703, 31676, +31648, 31620, 31592, 31563, 31534, +31505, 31475, 31445, 31415, 31384, +31353, 31322, 31290, 31258, 31226, +31193, 31160, 31127, 31093, 31059, +31025, 30990, 30955, 30920, 30884, +30848, 30812, 30775, 30738, 30701, +30663, 30625, 30587, 30548, 30509, +30470, 30430, 30390, 30350, 30309, +30269, 30227, 30186, 30144, 30102, +30059, 30016, 29973, 29930, 29886, +29842, 29797, 29752, 29707, 29662, +29616, 29570, 29524, 29477, 29430, +29383, 29335, 29287, 29239, 29190, +29142, 29092, 29043, 28993, 28943, +28892, 28842, 28791, 28739, 28688, +28636, 28583, 28531, 28478, 28425, +28371, 28317, 28263, 28209, 28154, +28099, 28044, 27988, 27932, 27876, +27820, 27763, 27706, 27648, 27591, +27533, 27474, 27416, 27357, 27298, +27238, 27178, 27118, 27058, 26997, +26936, 26875, 26814, 26752, 26690, +26628, 26565, 26502, 26439, 26375, +26312, 26247, 26183, 26119, 26054, +25988, 25923, 25857, 25791, 25725, +25658, 25592, 25524, 25457, 25389, +25322, 25253, 25185, 25116, 25047, +24978, 24908, 24838, 24768, 24698, +24627, 24557, 24485, 24414, 24342, +24270, 24198, 24126, 24053, 23980, +23907, 23834, 23760, 23686, 23612, +23537, 23462, 23387, 23312, 23237, +23161, 23085, 23009, 22932, 22856, +22779, 22701, 22624, 22546, 22468, +22390, 22312, 22233, 22154, 22075, +21996, 21916, 21836, 21756, 21676, +21595, 21515, 21434, 21352, 21271, +21189, 21107, 21025, 20943, 20860, +20777, 20694, 20611, 20528, 20444, +20360, 20276, 20192, 20107, 20022, +19937, 19852, 19767, 19681, 19595, +19509, 19423, 19336, 19250, 19163, +19076, 18988, 18901, 18813, 18725, +18637, 18549, 18460, 18372, 18283, +18194, 18104, 18015, 17925, 17835, +17745, 17655, 17565, 17474, 17383, +17292, 17201, 17110, 17018, 16927, +16835, 16743, 16650, 16558, 16465, +16372, 16279, 16186, 16093, 15999, +15906, 15812, 15718, 15624, 15529, +15435, 15340, 15245, 15150, 15055, +14960, 14864, 14769, 14673, 14577, +14481, 14385, 14288, 14192, 14095, +13998, 13901, 13804, 13706, 13609, +13511, 13414, 13316, 13218, 13119, +13021, 12923, 12824, 12725, 12626, +12527, 12428, 12329, 12230, 12130, +12030, 11930, 11831, 11730, 11630, +11530, 11430, 11329, 11228, 11128, +11027, 10926, 10824, 10723, 10622, +10520, 10419, 10317, 10215, 10113, +10011, 9909, 9807, 9704, 9602, +9499, 9397, 9294, 9191, 9088, +8985, 8882, 8778, 8675, 8572, +8468, 8364, 8261, 8157, 8053, +7949, 7845, 7741, 7637, 7532, +7428, 7323, 7219, 7114, 7009, +6905, 6800, 6695, 6590, 6485, +6380, 6274, 6169, 6064, 5958, +5853, 5747, 5642, 5536, 5430, +5325, 5219, 5113, 5007, 4901, +4795, 4689, 4583, 4476, 4370, +4264, 4157, 4051, 3945, 3838, +3732, 3625, 3518, 3412, 3305, +3198, 3092, 2985, 2878, 2771, +2664, 2558, 2451, 2344, 2237, +2130, 2023, 1916, 1809, 1702, +1594, 1487, 1380, 1273, 1166, +1059, 952, 844, 737, 630, +523, 416, 308, 201, 94, +-13, -121, -228, -335, -442, +-550, -657, -764, -871, -978, +-1086, -1193, -1300, -1407, -1514, +-1621, -1728, -1835, -1942, -2049, +-2157, -2263, -2370, -2477, -2584, +-2691, -2798, -2905, -3012, -3118, +-3225, -3332, -3439, -3545, -3652, +-3758, -3865, -3971, -4078, -4184, +-4290, -4397, -4503, -4609, -4715, +-4821, -4927, -5033, -5139, -5245, +-5351, -5457, -5562, -5668, -5774, +-5879, -5985, -6090, -6195, -6301, +-6406, -6511, -6616, -6721, -6826, +-6931, -7036, -7140, -7245, -7349, +-7454, -7558, -7663, -7767, -7871, +-7975, -8079, -8183, -8287, -8390, +-8494, -8597, -8701, -8804, -8907, +-9011, -9114, -9217, -9319, -9422, +-9525, -9627, -9730, -9832, -9934, +-10037, -10139, -10241, -10342, -10444, +-10546, -10647, -10748, -10850, -10951, +-11052, -11153, -11253, -11354, -11455, +-11555, -11655, -11756, -11856, -11955, +-12055, -12155, -12254, -12354, -12453, +-12552, -12651, -12750, -12849, -12947, +-13046, -13144, -13242, -13340, -13438, +-13536, -13633, -13731, -13828, -13925, +-14022, -14119, -14216, -14312, -14409, +-14505, -14601, -14697, -14793, -14888, +-14984, -15079, -15174, -15269, -15364, +-15459, -15553, -15647, -15741, -15835, +-15929, -16023, -16116, -16210, -16303, +-16396, -16488, -16581, -16673, -16766, +-16858, -16949, -17041, -17133, -17224, +-17315, -17406, -17497, -17587, -17678, +-17768, -17858, -17948, -18037, -18127, +-18216, -18305, -18394, -18483, -18571, +-18659, -18747, -18835, -18923, -19010, +-19098, -19185, -19271, -19358, -19444, +-19531, -19617, -19702, -19788, -19873, +-19959, -20043, -20128, -20213, -20297, +-20381, -20465, -20549, -20632, -20715, +-20798, -20881, -20963, -21046, -21128, +-21210, -21291, -21373, -21454, -21535, +-21616, -21696, -21776, -21856, -21936, +-22016, -22095, -22174, -22253, -22331, +-22410, -22488, -22566, -22643, -22721, +-22798, -22875, -22951, -23028, -23104, +-23180, -23256, -23331, -23406, -23481, +-23556, -23630, -23704, -23778, -23852, +-23925, -23998, -24071, -24144, -24216, +-24288, -24360, -24432, -24503, -24574, +-24645, -24716, -24786, -24856, -24926, +-24995, -25064, -25133, -25202, -25270, +-25339, -25406, -25474, -25541, -25608, +-25675, -25742, -25808, -25874, -25939, +-26005, -26070, -26135, -26199, -26264, +-26327, -26391, -26455, -26518, -26581, +-26643, -26705, -26767, -26829, -26891, +-26952, -27013, -27073, -27133, -27193, +-27253, -27312, -27372, -27430, -27489, +-27547, -27605, -27663, -27720, -27777, +-27834, -27890, -27946, -28002, -28058, +-28113, -28168, -28223, -28277, -28331, +-28385, -28438, -28491, -28544, -28596, +-28649, -28701, -28752, -28803, -28854, +-28905, -28955, -29006, -29055, -29105, +-29154, -29203, -29251, -29299, -29347, +-29395, -29442, -29489, -29535, -29582, +-29628, -29673, -29719, -29764, -29808, +-29853, -29897, -29941, -29984, -30027, +-30070, -30112, -30154, -30196, -30238, +-30279, -30320, -30360, -30400, -30440, +-30480, -30519, -30558, -30596, -30635, +-30672, -30710, -30747, -30784, -30821, +-30857, -30893, -30929, -30964, -30999, +-31033, -31068, -31102, -31135, -31168, +-31201, -31234, -31266, -31298, -31330, +-31361, -31392, -31422, -31453, -31483, +-31512, -31541, -31570, -31599, -31627, +-31655, -31682, -31710, -31737, -31763, +-31789, -31815, -31841, -31866, -31891, +-31915, -31939, -31963, -31986, -32010, +-32032, -32055, -32077, -32099, -32120, +-32141, -32162, -32182, -32202, -32222, +-32241, -32260, -32279, -32297, -32315, +-32333, -32350, -32367, -32383, -32399, +-32415, -32431, -32446, -32461, -32475, +-32489, -32503, -32517, -32530, -32542, +-32555, -32567, -32579, -32590, -32601, +-32612, -32622, -32632, -32641, -32651, +-32659, -32668, -32676, -32684, -32692, +-32699, -32706, -32712, -32718, -32724, +-32729, -32734, -32739, -32743, -32747, +-32751, -32754, -32757, -32760, -32762, +-32764, -32765, -32767, -32767, -32767, +32767, 32767, 32765, 32761, 32756, +32750, 32742, 32732, 32722, 32710, +32696, 32681, 32665, 32647, 32628, +32608, 32586, 32562, 32538, 32512, +32484, 32455, 32425, 32393, 32360, +32326, 32290, 32253, 32214, 32174, +32133, 32090, 32046, 32001, 31954, +31906, 31856, 31805, 31753, 31700, +31645, 31588, 31530, 31471, 31411, +31349, 31286, 31222, 31156, 31089, +31020, 30951, 30880, 30807, 30733, +30658, 30582, 30504, 30425, 30345, +30263, 30181, 30096, 30011, 29924, +29836, 29747, 29656, 29564, 29471, +29377, 29281, 29184, 29086, 28987, +28886, 28784, 28681, 28577, 28471, +28365, 28257, 28147, 28037, 27925, +27812, 27698, 27583, 27467, 27349, +27231, 27111, 26990, 26868, 26744, +26620, 26494, 26367, 26239, 26110, +25980, 25849, 25717, 25583, 25449, +25313, 25176, 25038, 24900, 24760, +24619, 24477, 24333, 24189, 24044, +23898, 23751, 23602, 23453, 23303, +23152, 22999, 22846, 22692, 22537, +22380, 22223, 22065, 21906, 21746, +21585, 21423, 21261, 21097, 20933, +20767, 20601, 20434, 20265, 20096, +19927, 19756, 19584, 19412, 19239, +19065, 18890, 18714, 18538, 18361, +18183, 18004, 17824, 17644, 17463, +17281, 17098, 16915, 16731, 16546, +16361, 16175, 15988, 15800, 15612, +15423, 15234, 15043, 14852, 14661, +14469, 14276, 14083, 13889, 13694, +13499, 13303, 13107, 12910, 12713, +12515, 12317, 12118, 11918, 11718, +11517, 11316, 11115, 10913, 10710, +10508, 10304, 10100, 9896, 9691, +9486, 9281, 9075, 8869, 8662, +8455, 8248, 8040, 7832, 7623, +7415, 7206, 6996, 6787, 6577, +6366, 6156, 5945, 5734, 5523, +5311, 5100, 4888, 4675, 4463, +4251, 4038, 3825, 3612, 3399, +3185, 2972, 2758, 2544, 2330, +2116, 1902, 1688, 1474, 1260, +1045, 831, 617, 402, 188, +-27, -241, -456, -670, -885, +-1099, -1313, -1528, -1742, -1956, +-2170, -2384, -2598, -2811, -3025, +-3239, -3452, -3665, -3878, -4091, +-4304, -4516, -4728, -4941, -5153, +-5364, -5576, -5787, -5998, -6209, +-6419, -6629, -6839, -7049, -7258, +-7467, -7676, -7884, -8092, -8300, +-8507, -8714, -8920, -9127, -9332, +-9538, -9743, -9947, -10151, -10355, +-10558, -10761, -10963, -11165, -11367, +-11568, -11768, -11968, -12167, -12366, +-12565, -12762, -12960, -13156, -13352, +-13548, -13743, -13937, -14131, -14324, +-14517, -14709, -14900, -15091, -15281, +-15470, -15659, -15847, -16035, -16221, +-16407, -16593, -16777, -16961, -17144, +-17326, -17508, -17689, -17869, -18049, +-18227, -18405, -18582, -18758, -18934, +-19108, -19282, -19455, -19627, -19799, +-19969, -20139, -20308, -20475, -20642, +-20809, -20974, -21138, -21301, -21464, +-21626, -21786, -21946, -22105, -22263, +-22420, -22575, -22730, -22884, -23037, +-23189, -23340, -23490, -23640, -23788, +-23935, -24080, -24225, -24369, -24512, +-24654, -24795, -24934, -25073, -25211, +-25347, -25482, -25617, -25750, -25882, +-26013, -26143, -26272, -26399, -26526, +-26651, -26775, -26898, -27020, -27141, +-27260, -27379, -27496, -27612, -27727, +-27841, -27953, -28065, -28175, -28284, +-28391, -28498, -28603, -28707, -28810, +-28911, -29012, -29111, -29209, -29305, +-29401, -29495, -29587, -29679, -29769, +-29858, -29946, -30032, -30118, -30201, +-30284, -30365, -30445, -30524, -30601, +-30677, -30752, -30825, -30897, -30968, +-31038, -31106, -31172, -31238, -31302, +-31365, -31426, -31486, -31545, -31602, +-31658, -31713, -31766, -31818, -31869, +-31918, -31966, -32012, -32058, -32101, +-32144, -32185, -32224, -32262, -32299, +-32335, -32369, -32401, -32433, -32463, +-32491, -32518, -32544, -32568, -32591, +-32613, -32633, -32652, -32669, -32685, +-32700, -32713, -32724, -32735, -32744, +-32751, -32757, -32762, -32766, -32767, +32767, 32764, 32755, 32741, 32720, +32694, 32663, 32626, 32583, 32535, +32481, 32421, 32356, 32286, 32209, +32128, 32041, 31948, 31850, 31747, +31638, 31523, 31403, 31278, 31148, +31012, 30871, 30724, 30572, 30415, +30253, 30086, 29913, 29736, 29553, +29365, 29172, 28974, 28771, 28564, +28351, 28134, 27911, 27684, 27452, +27216, 26975, 26729, 26478, 26223, +25964, 25700, 25432, 25159, 24882, +24601, 24315, 24026, 23732, 23434, +23133, 22827, 22517, 22204, 21886, +21565, 21240, 20912, 20580, 20244, +19905, 19563, 19217, 18868, 18516, +18160, 17802, 17440, 17075, 16708, +16338, 15964, 15588, 15210, 14829, +14445, 14059, 13670, 13279, 12886, +12490, 12093, 11693, 11291, 10888, +10482, 10075, 9666, 9255, 8843, +8429, 8014, 7597, 7180, 6760, +6340, 5919, 5496, 5073, 4649, +4224, 3798, 3372, 2945, 2517, +2090, 1661, 1233, 804, 375, +-54, -483, -911, -1340, -1768, +-2197, -2624, -3052, -3479, -3905, +-4330, -4755, -5179, -5602, -6024, +-6445, -6865, -7284, -7702, -8118, +-8533, -8946, -9358, -9768, -10177, +-10584, -10989, -11392, -11793, -12192, +-12589, -12984, -13377, -13767, -14155, +-14541, -14924, -15305, -15683, -16058, +-16430, -16800, -17167, -17531, -17892, +-18249, -18604, -18956, -19304, -19649, +-19990, -20329, -20663, -20994, -21322, +-21646, -21966, -22282, -22595, -22904, +-23208, -23509, -23806, -24099, -24387, +-24672, -24952, -25228, -25499, -25766, +-26029, -26288, -26541, -26791, -27035, +-27275, -27511, -27741, -27967, -28188, +-28405, -28616, -28823, -29024, -29221, +-29412, -29599, -29780, -29957, -30128, +-30294, -30455, -30611, -30761, -30906, +-31046, -31181, -31310, -31434, -31552, +-31665, -31773, -31875, -31972, -32063, +-32149, -32229, -32304, -32373, -32437, +-32495, -32547, -32594, -32635, -32671, +-32701, -32726, -32745, -32758, -32766, +32767, 32754, 32717, 32658, 32577, +32473, 32348, 32200, 32029, 31837, +31624, 31388, 31131, 30853, 30553, +30232, 29891, 29530, 29148, 28746, +28324, 27883, 27423, 26944, 26447, +25931, 25398, 24847, 24279, 23695, +23095, 22478, 21846, 21199, 20538, +19863, 19174, 18472, 17757, 17030, +16291, 15541, 14781, 14010, 13230, +12441, 11643, 10837, 10024, 9204, +8377, 7545, 6708, 5866, 5020, +4171, 3319, 2464, 1608, 751, +-107, -965, -1822, -2678, -3532, +-4383, -5232, -6077, -6918, -7754, +-8585, -9409, -10228, -11039, -11843, +-12639, -13426, -14204, -14972, -15730, +-16477, -17213, -17937, -18648, -19347, +-20033, -20705, -21363, -22006, -22634, +-23246, -23843, -24423, -24986, -25533, +-26062, -26573, -27066, -27540, -27995, +-28431, -28848, -29245, -29622, -29979, +-30315, -30630, -30924, -31197, -31449, +-31679, -31887, -32074, -32239, -32381, +-32501, -32600, -32675, -32729, -32759, +}; #endif static const CELTMode mode48000_960_120 = { diff --git a/media/libopus/celt/static_modes_fixed_arm_ne10.h b/media/libopus/celt/static_modes_fixed_arm_ne10.h new file mode 100644 index 0000000000..b8ef0cee98 --- /dev/null +++ b/media/libopus/celt/static_modes_fixed_arm_ne10.h @@ -0,0 +1,388 @@ +/* The contents of this file was automatically generated by + * dump_mode_arm_ne10.c with arguments: 48000 960 + * It contains static definitions for some pre-defined modes. */ +#include + +#ifndef NE10_FFT_PARAMS48000_960 +#define NE10_FFT_PARAMS48000_960 +static const ne10_int32_t ne10_factors_480[64] = { +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_240[64] = { +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_120[64] = { +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_60[64] = { +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, +{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, +{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, +{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, +{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, +{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, +{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, +{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, +{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, +{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, +{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, +{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, +{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, +{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, +{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, +{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570}, +{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154}, +{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172}, +{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068}, +{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822}, +{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146}, +{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682}, +{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231}, +{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791}, +{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029}, +{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313}, +{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783}, +{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661}, +{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541}, +{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450}, +{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914}, +{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194}, +{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807}, +{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067}, +{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658}, +{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677}, +{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704}, +{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277}, +{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512}, +{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510}, +{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682}, +{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424}, +{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524}, +{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195}, +{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164}, +{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771}, +{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961}, +{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705}, +{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540}, +{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994}, +{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540}, +{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646}, +{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814}, +{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593}, +{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667}, +{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, +{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, +{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, +{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, +{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, +{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, +{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, +{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, +{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, +{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, +{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, +{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, +{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, +{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, +{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, +{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, +{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, +{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, +{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, +{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, +{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584}, +{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622}, +{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955}, +{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651}, +{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703}, +{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114}, +{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330}, +{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403}, +{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078}, +{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372}, +{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738}, +{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248}, +{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400}, +{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552}, +{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141}, +{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553}, +{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377}, +{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448}, +{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240}, +{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574}, +{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630}, +{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288}, +{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960}, +{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934}, +{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097}, +{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189}, +{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282}, +{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280}, +{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875}, +{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603}, +{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339}, +{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386}, +{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674}, +{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010}, +{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564}, +{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860}, +{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118}, +{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768}, +{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839}, +{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595}, +{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001}, +{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837}, +{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918}, +{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354}, +{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119}, +{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555}, +{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188}, +{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928}, +{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484}, +{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706}, +{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252}, +{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149}, +{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287}, +{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850}, +{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059}, +{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202}, +{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458}, +{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823}, +{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542}, +{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, +{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, +{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, +{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, +{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, +{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, +{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, +{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, +{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, +{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, +{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, +{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, +{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, +{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, +{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, +{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, +{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, +{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, +{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, +{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, +{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, +{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, +{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, +{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, +{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496}, +{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855}, +{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883}, +{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330}, +{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268}, +{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035}, +{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910}, +{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276}, +{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785}, +{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298}, +{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465}, +{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435}, +{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248}, +{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368}, +{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912}, +{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536}, +{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647}, +{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493}, +{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827}, +{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413}, +{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096}, +{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084}, +{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406}, +{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881}, +{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895}, +{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257}, +{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506}, +{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107}, +{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239}, +{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, +{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, +{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, +{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, +{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, +{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, +{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, +{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, +{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, +{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, +{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, +{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, +{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, +{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, +{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, +{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, +{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, +{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, +{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, +}; +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = { +120, +(ne10_int32_t *)ne10_factors_480, +(ne10_fft_cpx_int32_t *)ne10_twiddles_480, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120], +}; +static const arch_fft_state cfg_arch_480 = { +1, +(void *)&ne10_fft_state_int32_t_480, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = { +60, +(ne10_int32_t *)ne10_factors_240, +(ne10_fft_cpx_int32_t *)ne10_twiddles_240, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60], +}; +static const arch_fft_state cfg_arch_240 = { +1, +(void *)&ne10_fft_state_int32_t_240, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = { +30, +(ne10_int32_t *)ne10_factors_120, +(ne10_fft_cpx_int32_t *)ne10_twiddles_120, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30], +}; +static const arch_fft_state cfg_arch_120 = { +1, +(void *)&ne10_fft_state_int32_t_120, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = { +15, +(ne10_int32_t *)ne10_factors_60, +(ne10_fft_cpx_int32_t *)ne10_twiddles_60, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15], +}; +static const arch_fft_state cfg_arch_60 = { +1, +(void *)&ne10_fft_state_int32_t_60, +}; + +#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/media/libopus/celt/static_modes_float.h b/media/libopus/celt/static_modes_float.h index 5d7e7b8e68..e102a38391 100644 --- a/media/libopus/celt/static_modes_float.h +++ b/media/libopus/celt/static_modes_float.h @@ -4,6 +4,11 @@ #include "modes.h" #include "rate.h" +#ifdef HAVE_ARM_NE10 +#define OVERRIDE_FFT 1 +#include "static_modes_float_arm_ne10.h" +#endif + #ifndef DEF_WINDOW120 #define DEF_WINDOW120 static const opus_val16 window120[120] = { @@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -428,9 +433,14 @@ static const kiss_fft_state fft_state48000_960_0 = { 480, /* nfft */ 0.002083333f, /* scale */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_480, +#else +NULL, +#endif }; #endif @@ -440,9 +450,14 @@ static const kiss_fft_state fft_state48000_960_1 = { 240, /* nfft */ 0.004166667f, /* scale */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_240, +#else +NULL, +#endif }; #endif @@ -452,9 +467,14 @@ static const kiss_fft_state fft_state48000_960_2 = { 120, /* nfft */ 0.008333333f, /* scale */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_120, +#else +NULL, +#endif }; #endif @@ -464,9 +484,14 @@ static const kiss_fft_state fft_state48000_960_3 = { 60, /* nfft */ 0.016666667f, /* scale */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_60, +#else +NULL, +#endif }; #endif @@ -474,104 +499,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] = { -1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f, -0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f, -0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f, -0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f, -0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f, -0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f, -0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f, -0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f, -0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f, -0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f, -0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f, -0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f, -0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f, -0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f, -0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f, -0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f, -0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f, -0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f, -0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f, -0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f, -0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f, -0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f, -0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f, -0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f, -0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f, -0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f, -0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f, -0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f, -0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f, -0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f, -0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f, -0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f, -0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f, -0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f, -0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f, -0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f, -0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f, -0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f, -0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f, -0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f, -0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f, -0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f, -0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f, -0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f, -0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f, -0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f, -0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f, -0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f, -0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f, -0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f, -0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f, -0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f, -0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f, -0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f, -0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f, -0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f, -0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f, -0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f, -0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f, -0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f, -0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f, -0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f, -0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f, -0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f, -0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f, -0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f, -0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f, -0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f, -0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f, -0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f, -0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f, -0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f, -0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f, -0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f, -0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f, -0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f, -0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f, -0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f, -0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f, -0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f, -0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f, -0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f, -0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f, -0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f, -0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f, -0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f, -0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f, -0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f, -0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f, -0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f, -0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f, -0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f, -0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f, -0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f, -0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f, -0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f, --4.3711390e-08f, }; +static const opus_val16 mdct_twiddles960[1800] = { +0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f, +0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f, +0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f, +0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f, +0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f, +0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f, +0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f, +0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f, +0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f, +0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f, +0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f, +0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f, +0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f, +0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f, +0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f, +0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f, +0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f, +0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f, +0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f, +0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f, +0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f, +0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f, +0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f, +0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f, +0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f, +0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f, +0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f, +0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f, +0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f, +0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f, +0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f, +0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f, +0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f, +0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f, +0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f, +0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f, +0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f, +0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f, +0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f, +0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f, +0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f, +0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f, +0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f, +0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f, +0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f, +0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f, +0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f, +0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f, +0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f, +0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f, +0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f, +0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f, +0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f, +0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f, +0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f, +0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f, +0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f, +0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f, +0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f, +0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f, +0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f, +0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f, +0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f, +0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f, +0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f, +0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f, +0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f, +0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f, +0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f, +0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f, +0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f, +0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f, +0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f, +0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f, +0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f, +0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f, +0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f, +0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f, +0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f, +0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f, +0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f, +0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f, +0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f, +0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f, +0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f, +0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f, +0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f, +0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f, +0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f, +0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f, +0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f, +0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f, +0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f, +0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f, +0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f, +0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f, +-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f, +-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f, +-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f, +-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f, +-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f, +-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f, +-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f, +-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f, +-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f, +-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f, +-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f, +-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f, +-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f, +-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f, +-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f, +-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f, +-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f, +-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f, +-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f, +-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f, +-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f, +-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f, +-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f, +-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f, +-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f, +-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f, +-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f, +-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f, +-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f, +-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f, +-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f, +-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f, +-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f, +-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f, +-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f, +-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f, +-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f, +-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f, +-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f, +-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f, +-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f, +-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f, +-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f, +-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f, +-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f, +-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f, +-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f, +-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f, +-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f, +-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f, +-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f, +-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f, +-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f, +-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f, +-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f, +-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f, +-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f, +-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f, +-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f, +-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f, +-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f, +-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f, +-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f, +-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f, +-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f, +-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f, +-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f, +-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f, +-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f, +-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f, +-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f, +-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f, +-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f, +-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f, +-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f, +-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f, +-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f, +-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f, +-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f, +-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f, +-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f, +-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f, +-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f, +-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f, +-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f, +-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f, +-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f, +-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f, +-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f, +-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f, +-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f, +-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f, +-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f, +-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f, +-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f, +-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f, +0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f, +0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f, +0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f, +0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f, +0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f, +0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f, +0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f, +0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f, +0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f, +0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f, +0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f, +0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f, +0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f, +0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f, +0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f, +0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f, +0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f, +0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f, +0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f, +0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f, +0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f, +0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f, +0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f, +0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f, +0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f, +0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f, +0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f, +0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f, +0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f, +0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f, +0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f, +0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f, +0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f, +0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f, +0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f, +0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f, +0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f, +0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f, +0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f, +0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f, +0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f, +0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f, +0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f, +0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f, +0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f, +0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f, +0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f, +0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f, +-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f, +-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f, +-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f, +-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f, +-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f, +-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f, +-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f, +-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f, +-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f, +-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f, +-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f, +-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f, +-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f, +-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f, +-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f, +-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f, +-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f, +-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f, +-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f, +-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f, +-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f, +-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f, +-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f, +-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f, +-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f, +-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f, +-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f, +-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f, +-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f, +-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f, +-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f, +-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f, +-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f, +-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f, +-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f, +-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f, +-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f, +-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f, +-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f, +-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f, +-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f, +-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f, +-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f, +-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f, +-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f, +-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f, +-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f, +-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f, +0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f, +0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f, +0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f, +0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f, +0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f, +0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f, +0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f, +0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f, +0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f, +0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f, +0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f, +0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f, +0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f, +0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f, +0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f, +0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f, +0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f, +0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f, +0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f, +0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f, +0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f, +0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f, +0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f, +0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f, +-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f, +-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f, +-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f, +-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f, +-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f, +-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f, +-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f, +-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f, +-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f, +-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f, +-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f, +-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f, +-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f, +-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f, +-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f, +-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f, +-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f, +-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f, +-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f, +-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f, +-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f, +-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f, +-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f, +-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f, +0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f, +0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f, +0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f, +0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f, +0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f, +0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f, +0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f, +0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f, +0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f, +0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f, +0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f, +0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f, +-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f, +-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f, +-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f, +-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f, +-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f, +-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f, +-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f, +-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f, +-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f, +-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f, +-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f, +-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f, +}; #endif static const CELTMode mode48000_960_120 = { diff --git a/media/libopus/celt/static_modes_float_arm_ne10.h b/media/libopus/celt/static_modes_float_arm_ne10.h new file mode 100644 index 0000000000..934a82a420 --- /dev/null +++ b/media/libopus/celt/static_modes_float_arm_ne10.h @@ -0,0 +1,404 @@ +/* The contents of this file was automatically generated by + * dump_mode_arm_ne10.c with arguments: 48000 960 + * It contains static definitions for some pre-defined modes. */ +#include + +#ifndef NE10_FFT_PARAMS48000_960 +#define NE10_FFT_PARAMS48000_960 +static const ne10_int32_t ne10_factors_480[64] = { +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_240[64] = { +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_120[64] = { +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_60[64] = { +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f}, +{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f}, +{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f}, +{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f}, +{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f}, +{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f}, +{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f}, +{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f}, +{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f}, +{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f}, +{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f}, +{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f}, +{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f}, +{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f}, +{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f}, +{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f}, +{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f}, +{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f}, +{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f}, +{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f}, +{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f}, +{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f}, +{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f}, +{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f}, +{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f}, +{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f}, +{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f}, +{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f}, +{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f}, +{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f}, +{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f}, +{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f}, +{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f}, +{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f}, +{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f}, +{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f}, +{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f}, +{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f}, +{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f}, +{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f}, +{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f}, +{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f}, +{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f}, +{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f}, +{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f}, +{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f}, +{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f}, +{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f}, +{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f}, +{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f}, +{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f}, +{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f}, +{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f}, +{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f}, +{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f}, +{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f}, +{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f}, +{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f}, +{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f}, +{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f}, +{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f}, +{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f}, +{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f}, +{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f}, +{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f}, +{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f}, +{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f}, +{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f}, +{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f}, +{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f}, +{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f}, +{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f}, +{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f}, +{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f}, +{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f}, +{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f}, +{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f}, +{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f}, +{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f}, +{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f}, +{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f}, +{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f}, +{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f}, +{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f}, +{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f}, +{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f}, +{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f}, +{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f}, +{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f}, +{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f}, +{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f}, +{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f}, +{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f}, +{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f}, +{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f}, +{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f}, +{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f}, +{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f}, +{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f}, +{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f}, +{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f}, +{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f}, +{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f}, +{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f}, +{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f}, +{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f}, +{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f}, +{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f}, +{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f}, +{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f}, +{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f}, +{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f}, +{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f}, +{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f}, +{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f}, +{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f}, +{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f}, +{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f}, +{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f}, +{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f}, +{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f}, +{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f}, +{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f}, +{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f}, +{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f}, +{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f}, +{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f}, +{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +}; +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = { +120, +(ne10_int32_t *)ne10_factors_480, +(ne10_fft_cpx_float32_t *)ne10_twiddles_480, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_480 = { +1, +(void *)&ne10_fft_state_float32_t_480, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = { +60, +(ne10_int32_t *)ne10_factors_240, +(ne10_fft_cpx_float32_t *)ne10_twiddles_240, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_240 = { +1, +(void *)&ne10_fft_state_float32_t_240, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = { +30, +(ne10_int32_t *)ne10_factors_120, +(ne10_fft_cpx_float32_t *)ne10_twiddles_120, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_120 = { +1, +(void *)&ne10_fft_state_float32_t_120, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = { +15, +(ne10_int32_t *)ne10_factors_60, +(ne10_fft_cpx_float32_t *)ne10_twiddles_60, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_60 = { +1, +(void *)&ne10_fft_state_float32_t_60, +}; + +#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/media/libopus/celt/vq.c b/media/libopus/celt/vq.c index 98a0f36c9f..f358396065 100644 --- a/media/libopus/celt/vq.c +++ b/media/libopus/celt/vq.c @@ -37,19 +37,23 @@ #include "os_support.h" #include "bands.h" #include "rate.h" +#include "pitch.h" +#ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; + opus_val16 ms; celt_norm *Xptr; Xptr = X; + ms = NEG16(s); for (i=0;i=0;i--) @@ -57,10 +61,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } } +#endif /* OVERRIDE_vq_exp_rotation1 */ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { @@ -91,7 +96,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int } /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for extract_collapse_mask().*/ - len /= stride; + len = celt_udiv(len, stride); for (i=0;i0, "alg_unquant() needs at least one pulse"); celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); ALLOC(iy, N, int); - decode_pulses(iy, N, K, dec); - Ryy = 0; - i=0; - do { - Ryy = MAC16_16(Ryy, iy[i], iy[i]); - } while (++i < N); + Ryy = decode_pulses(iy, N, K, dec); normalise_residual(iy, X, N, Ryy, gain); exp_rotation(X, N, -1, B, K, spread); collapse_mask = extract_collapse_mask(iy, N, B); @@ -344,21 +345,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, return collapse_mask; } -void renormalise_vector(celt_norm *X, int N, opus_val16 gain) +#ifndef OVERRIDE_renormalise_vector +void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) { int i; #ifdef FIXED_POINT int k; #endif - opus_val32 E = EPSILON; + opus_val32 E; opus_val16 g; opus_val32 t; - celt_norm *xptr = X; - for (i=0;i>1; #endif @@ -373,8 +371,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) } /*return celt_sqrt(E);*/ } +#endif /* OVERRIDE_renormalise_vector */ -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) { int i; int itheta; @@ -393,14 +392,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) Eside = MAC16_16(Eside, s, s); } } else { - for (i=0;i +#include +#include +#include "celt_lpc.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "pitch.h" +#include "x86cpu.h" + +#if defined(FIXED_POINT) + +void celt_fir_sse4_1(const opus_val16 *_x, + const opus_val16 *num, + opus_val16 *_y, + int N, + int ord, + opus_val16 *mem, + int arch) +{ + int i,j; + VARDECL(opus_val16, rnum); + VARDECL(opus_val16, x); + + __m128i vecNoA; + opus_int32 noA ; + SAVE_STACK; + + ALLOC(rnum, ord, opus_val16); + ALLOC(x, N+ord, opus_val16); + for(i=0;i> 1; + vecNoA = _mm_set_epi32(noA, noA, noA, noA); + + for (i=0;i +#include "arch.h" + +void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) +{ + int j; + __m128 xsum1, xsum2; + xsum1 = _mm_loadu_ps(sum); + xsum2 = _mm_setzero_ps(); + + for (j = 0; j < len-3; j += 4) + { + __m128 x0 = _mm_loadu_ps(x+j); + __m128 yj = _mm_loadu_ps(y+j); + __m128 y3 = _mm_loadu_ps(y+j+3); + + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), + _mm_shuffle_ps(yj,y3,0x49))); + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), + _mm_shuffle_ps(yj,y3,0x9e))); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); + } + if (j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + } + } + } + _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); +} + + +void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2) +{ + int i; + __m128 xsum1, xsum2; + xsum1 = _mm_setzero_ps(); + xsum2 = _mm_setzero_ps(); + for (i=0;i -#include "arch.h" +#if defined(HAVE_CONFIG_H) +#include "config.h" +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) +void xcorr_kernel_sse4_1( + const opus_int16 *x, + const opus_int16 *y, + opus_val32 sum[4], + int len); +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) +void xcorr_kernel_sse( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); +#endif + +#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) +#define OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len)) + +#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) +#define OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)arch, xcorr_kernel_sse(x, y, sum, len)) + +#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) + +extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); #define OVERRIDE_XCORR_KERNEL -static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) -{ - int j; - __m128 xsum1, xsum2; - xsum1 = _mm_loadu_ps(sum); - xsum2 = _mm_setzero_ps(); +#define xcorr_kernel(x, y, sum, len, arch) \ + ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) - for (j = 0; j < len-3; j += 4) - { - __m128 x0 = _mm_loadu_ps(x+j); - __m128 yj = _mm_loadu_ps(y+j); - __m128 y3 = _mm_loadu_ps(y+j+3); +#endif - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), - _mm_shuffle_ps(yj,y3,0x49))); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), - _mm_shuffle_ps(yj,y3,0x9e))); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); - } - if (j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - } - } - } - _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); -} +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) +opus_val32 celt_inner_prod_sse4_1( + const opus_int16 *x, + const opus_int16 *y, + int N); +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) +opus_val32 celt_inner_prod_sse2( + const opus_int16 *x, + const opus_int16 *y, + int N); +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) +opus_val32 celt_inner_prod_sse( + const opus_val16 *x, + const opus_val16 *y, + int N); +#endif + + +#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) +#define OVERRIDE_CELT_INNER_PROD +#define celt_inner_prod(x, y, N, arch) \ + ((void)arch, celt_inner_prod_sse4_1(x, y, N)) + +#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) +#define OVERRIDE_CELT_INNER_PROD +#define celt_inner_prod(x, y, N, arch) \ + ((void)arch, celt_inner_prod_sse2(x, y, N)) + +#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) +#define OVERRIDE_CELT_INNER_PROD +#define celt_inner_prod(x, y, N, arch) \ + ((void)arch, celt_inner_prod_sse(x, y, N)) + + +#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) + +extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + int N); + +#define OVERRIDE_CELT_INNER_PROD +#define celt_inner_prod(x, y, N, arch) \ + ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N)) + +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) #define OVERRIDE_DUAL_INNER_PROD -static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - __m128 xsum1, xsum2; - xsum1 = _mm_setzero_ps(); - xsum2 = _mm_setzero_ps(); - for (i=0;i +#include + +#include "macros.h" +#include "celt_lpc.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "pitch.h" + +#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) +opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, + int N) +{ + opus_int i, dataSize16; + opus_int32 sum; + + __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; + __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; + + sum = 0; + dataSize16 = N & ~15; + + acc1 = _mm_setzero_si128(); + acc2 = _mm_setzero_si128(); + + for (i=0;i= 8) + { + inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); + inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); + + inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); + + acc1 = _mm_add_epi32(acc1, inVec1_76543210); + i += 8; + } + + acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); + acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); + sum += _mm_cvtsi128_si32(acc1); + + for (;i +#include + +#include "macros.h" +#include "celt_lpc.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "pitch.h" + +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) +#include +#include "x86cpu.h" + +opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y, + int N) +{ + opus_int i, dataSize16; + opus_int32 sum; + __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; + __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; + __m128i inVec1_3210, inVec2_3210; + + sum = 0; + dataSize16 = N & ~15; + + acc1 = _mm_setzero_si128(); + acc2 = _mm_setzero_si128(); + + for (i=0;i= 8) + { + inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); + inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); + + inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); + + acc1 = _mm_add_epi32(acc1, inVec1_76543210); + i += 8; + } + + if (N - i >= 4) + { + inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]); + inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]); + + inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); + + acc1 = _mm_add_epi32(acc1, inVec1_3210); + i += 4; + } + + acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); + acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); + + sum += _mm_cvtsi128_si32(acc1); + + for (;i= 3); + + sum0 = _mm_setzero_si128(); + sum1 = _mm_setzero_si128(); + sum2 = _mm_setzero_si128(); + sum3 = _mm_setzero_si128(); + + for (j=0;j<(len-7);j+=8) + { + vecX = _mm_loadu_si128((__m128i *)(&x[j + 0])); + vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0])); + vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1])); + vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2])); + vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3])); + + sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); + sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); + sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); + sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3)); + } + + sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0)); + sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E)); + + sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1)); + sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E)); + + sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2)); + sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E)); + + sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3)); + sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E)); + + vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1), + _mm_unpacklo_epi32(sum2, sum3)); + + for (;j<(len-3);j+=4) + { + vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); + vecX0 = _mm_shuffle_epi32(vecX, 0x00); + vecX1 = _mm_shuffle_epi32(vecX, 0x55); + vecX2 = _mm_shuffle_epi32(vecX, 0xaa); + vecX3 = _mm_shuffle_epi32(vecX, 0xff); + + vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); + vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); + vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]); + vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]); + + sum0 = _mm_mullo_epi32(vecX0, vecY0); + sum1 = _mm_mullo_epi32(vecX1, vecY1); + sum2 = _mm_mullo_epi32(vecX2, vecY2); + sum3 = _mm_mullo_epi32(vecX3, vecY3); + + sum0 = _mm_add_epi32(sum0, sum1); + sum2 = _mm_add_epi32(sum2, sum3); + vecSum = _mm_add_epi32(vecSum, sum0); + vecSum = _mm_add_epi32(vecSum, sum2); + } + + for (;j +static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) +{ + __cpuid((int*)CPUInfo, InfoType); +} + +#else + +#if defined(CPU_INFO_BY_C) +#include +#endif + +static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) +{ +#if defined(CPU_INFO_BY_ASM) +#if defined(__i386__) && defined(__PIC__) +/* %ebx is PIC register in 32-bit, so mustn't clobber it. */ + __asm__ __volatile__ ( + "xchg %%ebx, %1\n" + "cpuid\n" + "xchg %%ebx, %1\n": + "=a" (CPUInfo[0]), + "=r" (CPUInfo[1]), + "=c" (CPUInfo[2]), + "=d" (CPUInfo[3]) : + "0" (InfoType) + ); +#else + __asm__ __volatile__ ( + "cpuid": + "=a" (CPUInfo[0]), + "=b" (CPUInfo[1]), + "=c" (CPUInfo[2]), + "=d" (CPUInfo[3]) : + "0" (InfoType) + ); +#endif +#elif defined(CPU_INFO_BY_C) + __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3])); +#endif +} + +#endif + +typedef struct CPU_Feature{ + /* SIMD: 128-bit */ + int HW_SSE; + int HW_SSE2; + int HW_SSE41; + /* SIMD: 256-bit */ + int HW_AVX; +} CPU_Feature; + +static void opus_cpu_feature_check(CPU_Feature *cpu_feature) +{ + unsigned int info[4] = {0}; + unsigned int nIds = 0; + + cpuid(info, 0); + nIds = info[0]; + + if (nIds >= 1){ + cpuid(info, 1); + cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; + cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; + cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; + cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; + } + else { + cpu_feature->HW_SSE = 0; + cpu_feature->HW_SSE2 = 0; + cpu_feature->HW_SSE41 = 0; + cpu_feature->HW_AVX = 0; + } +} + +int opus_select_arch(void) +{ + CPU_Feature cpu_feature; + int arch; + + opus_cpu_feature_check(&cpu_feature); + + arch = 0; + if (!cpu_feature.HW_SSE) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_SSE2) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_SSE41) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_AVX) + { + return arch; + } + arch++; + + return arch; +} + +#endif diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h new file mode 100644 index 0000000000..04fd48aac4 --- /dev/null +++ b/media/libopus/celt/x86/x86cpu.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(X86CPU_H) +# define X86CPU_H + +# if defined(OPUS_X86_MAY_HAVE_SSE) +# define MAY_HAVE_SSE(name) name ## _sse +# else +# define MAY_HAVE_SSE(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# define MAY_HAVE_SSE2(name) name ## _sse2 +# else +# define MAY_HAVE_SSE2(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 +# else +# define MAY_HAVE_SSE4_1(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_AVX) +# define MAY_HAVE_AVX(name) name ## _avx +# else +# define MAY_HAVE_AVX(name) name ## _c +# endif + +# if defined(OPUS_HAVE_RTCD) +int opus_select_arch(void); +# endif + +/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() + or _mm_cvtepi16_epi32() when optimizations are disabled, even though the + actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory + reference, these require 16-byte alignment and load a full 16 bytes (instead + of 4 or 8), possibly reading out of bounds. + + We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or + _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 + reference in the PMOVSXWD instruction itself, but gcc is not smart enough to + optimize this out when optimizations ARE enabled. + + Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 + (which is fair, since technically the compiler is always allowed to do the + dereference before invoking the function implementing the intrinsic). + However, it is smart enough to eliminate the extra MOVD instruction. + For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out + the extra MOVQ if it's specified explicitly */ + +# if defined(__clang__) || !defined(__OPTIMIZE__) +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) +# else +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(*(__m128i *)(x))) +#endif + +# if !defined(__OPTIMIZE__) +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) +# else +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(*(__m128i *)(x))) +# endif + +#endif diff --git a/media/libopus/gen-sources.py b/media/libopus/gen-sources.py index a861607b25..00c180940b 100644 --- a/media/libopus/gen-sources.py +++ b/media/libopus/gen-sources.py @@ -26,7 +26,7 @@ def generate_sources_mozbuild(path): 'silk_sources.mk', ] - var_definition = re.compile('([A-Z_]*) = (.*)') + var_definition = re.compile('([A-Z0-9_]*) = (.*)') with open('sources.mozbuild', 'w') as output: output.write('# THIS FILE WAS AUTOMATICALLY GENERATED BY %s. DO NOT EDIT.\n' % sys.argv[0]) diff --git a/media/libopus/include/opus.h b/media/libopus/include/opus.h index 93a53a2ffc..b0bdf6f2df 100644 --- a/media/libopus/include/opus.h +++ b/media/libopus/include/opus.h @@ -616,7 +616,10 @@ OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, fl * merged. Splitting valid Opus packets is always guaranteed to succeed, * whereas merging valid packets only succeeds if all frames have the same * mode, bandwidth, and frame size, and when the total duration of the merged - * packet is no more than 120 ms. + * packet is no more than 120 ms. The 120 ms limit comes from the + * specification and limits decoder memory requirements at a point where + * framing overhead becomes negligible. + * * The repacketizer currently only operates on elementary Opus * streams. It will not manipualte multistream packets successfully, except in * the degenerate case where they consist of data from a single stream. diff --git a/media/libopus/include/opus_defines.h b/media/libopus/include/opus_defines.h index 265089f65e..647ed5d6f2 100644 --- a/media/libopus/include/opus_defines.h +++ b/media/libopus/include/opus_defines.h @@ -46,7 +46,7 @@ extern "C" { #define OPUS_OK 0 /** One or more invalid/out of range arguments @hideinitializer*/ #define OPUS_BAD_ARG -1 -/** The mode struct passed is invalid @hideinitializer*/ +/** Not enough bytes allocated in the buffer @hideinitializer*/ #define OPUS_BUFFER_TOO_SMALL -2 /** An internal error was detected @hideinitializer*/ #define OPUS_INTERNAL_ERROR -3 @@ -274,7 +274,6 @@ extern "C" { /** Enables or disables variable bitrate (VBR) in the encoder. * The configured bitrate may not be met exactly because frames must * be an integer number of bytes in length. - * @warning Only the MDCT mode of Opus can provide hard CBR behavior. * @see OPUS_GET_VBR * @see OPUS_SET_VBR_CONSTRAINT * @param[in] x opus_int32: Allowed values: @@ -454,14 +453,6 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) -/** Gets the sampling rate the encoder or decoder was initialized with. - * This simply returns the Fs value passed to opus_encoder_init() - * or opus_decoder_init(). - * @param[out] x opus_int32 *: Sampling rate of encoder or decoder. - * @hideinitializer - */ -#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) - /** Gets the total samples of delay added by the entire codec. * This can be queried by the encoder and then the provided number of samples can be * skipped on from the start of the decoder's output to provide time aligned input @@ -498,9 +489,9 @@ extern "C" { #define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x) /** Configures the encoder's expected packet loss percentage. - * Higher values with trigger progressively more loss resistant behavior in the encoder - * at the expense of quality at a given bitrate in the lossless case, but greater quality - * under loss. + * Higher values trigger progressively more loss resistant behavior in the encoder + * at the expense of quality at a given bitrate in the absence of packet loss, but + * greater quality under loss. * @see OPUS_GET_PACKET_LOSS_PERC * @param[in] x opus_int32: Loss percentage in the range 0-100, inclusive (default: 0). * @hideinitializer */ @@ -532,7 +523,19 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x) /** Configures the depth of signal being encoded. + * * This is a hint which helps the encoder identify silence and near-silence. + * It represents the number of significant bits of linear intensity below + * which the signal contains ignorable quantization or other noise. + * + * For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting + * for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate + * for 16-bit linear pcm input with opus_encode_float(). + * + * When using opus_encode() instead of opus_encode_float(), or when libopus + * is compiled for fixed-point, the encoder uses the minimum of the value + * set here and the value 16. + * * @see OPUS_GET_LSB_DEPTH * @param[in] x opus_int32: Input precision in bits, between 8 and 24 * (default: 24). @@ -545,11 +548,6 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) -/** Gets the duration (in samples) of the last packet successfully decoded or concealed. - * @param[out] x opus_int32 *: Number of samples (at current sampling rate). - * @hideinitializer */ -#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) - /** Configures the encoder's use of variable duration frames. * When variable duration is enabled, the encoder is free to use a shorter frame * size than the one requested in the opus_encode*() call. @@ -558,12 +556,12 @@ extern "C" { * packet. The part of the audio that was not encoded needs to be resent to the * encoder for the next call. Do not use this option unless you really * know what you are doing. - * @see OPUS_GET_EXPERT_VARIABLE_DURATION + * @see OPUS_GET_EXPERT_FRAME_DURATION * @param[in] x opus_int32: Allowed values: *
*
OPUS_FRAMESIZE_ARG
Select frame size from the argument (default).
*
OPUS_FRAMESIZE_2_5_MS
Use 2.5 ms frames.
- *
OPUS_FRAMESIZE_5_MS
Use 2.5 ms frames.
+ *
OPUS_FRAMESIZE_5_MS
Use 5 ms frames.
*
OPUS_FRAMESIZE_10_MS
Use 10 ms frames.
*
OPUS_FRAMESIZE_20_MS
Use 20 ms frames.
*
OPUS_FRAMESIZE_40_MS
Use 40 ms frames.
@@ -573,12 +571,12 @@ extern "C" { * @hideinitializer */ #define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) /** Gets the encoder's configured use of variable duration frames. - * @see OPUS_SET_EXPERT_VARIABLE_DURATION + * @see OPUS_SET_EXPERT_FRAME_DURATION * @param[out] x opus_int32 *: Returns one of the following values: *
*
OPUS_FRAMESIZE_ARG
Select frame size from the argument (default).
*
OPUS_FRAMESIZE_2_5_MS
Use 2.5 ms frames.
- *
OPUS_FRAMESIZE_5_MS
Use 2.5 ms frames.
+ *
OPUS_FRAMESIZE_5_MS
Use 5 ms frames.
*
OPUS_FRAMESIZE_10_MS
Use 10 ms frames.
*
OPUS_FRAMESIZE_20_MS
Use 20 ms frames.
*
OPUS_FRAMESIZE_40_MS
Use 40 ms frames.
@@ -589,10 +587,22 @@ extern "C" { #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) /** If set to 1, disables almost all use of prediction, making frames almost - completely independent. This reduces quality. (default : 0) + * completely independent. This reduces quality. + * @see OPUS_GET_PREDICTION_DISABLED + * @param[in] x opus_int32: Allowed values: + *
+ *
0
Enable prediction (default).
+ *
1
Disable prediction.
+ *
* @hideinitializer */ #define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x) /** Gets the encoder's configured prediction status. + * @see OPUS_SET_PREDICTION_DISABLED + * @param[out] x opus_int32 *: Returns one of the following values: + *
+ *
0
Prediction enabled (default).
+ *
1
Prediction disabled.
+ *
* @hideinitializer */ #define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x) @@ -649,18 +659,6 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) -/** Gets the pitch of the last decoded frame, if available. - * This can be used for any post-processing algorithm requiring the use of pitch, - * e.g. time stretching/shortening. If the last frame was not voiced, or if the - * pitch was not coded in the frame, then zero is returned. - * - * This CTL is only implemented for decoder instances. - * - * @param[out] x opus_int32 *: pitch period at 48 kHz (or 0 if not available) - * - * @hideinitializer */ -#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) - /** Gets the encoder's configured bandpass or the decoder's last bandpass. * @see OPUS_SET_BANDWIDTH * @param[out] x opus_int32 *: Returns one of the following values: @@ -675,6 +673,14 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) +/** Gets the sampling rate the encoder or decoder was initialized with. + * This simply returns the Fs value passed to opus_encoder_init() + * or opus_decoder_init(). + * @param[out] x opus_int32 *: Sampling rate of encoder or decoder. + * @hideinitializer + */ +#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) + /**@}*/ /** @defgroup opus_decoderctls Decoder related CTLs @@ -699,6 +705,23 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) +/** Gets the duration (in samples) of the last packet successfully decoded or concealed. + * @param[out] x opus_int32 *: Number of samples (at current sampling rate). + * @hideinitializer */ +#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) + +/** Gets the pitch of the last decoded frame, if available. + * This can be used for any post-processing algorithm requiring the use of pitch, + * e.g. time stretching/shortening. If the last frame was not voiced, or if the + * pitch was not coded in the frame, then zero is returned. + * + * This CTL is only implemented for decoder instances. + * + * @param[out] x opus_int32 *: pitch period at 48 kHz (or 0 if not available) + * + * @hideinitializer */ +#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) + /**@}*/ /** @defgroup opus_libinfo Opus library information functions @@ -713,6 +736,10 @@ extern "C" { OPUS_EXPORT const char *opus_strerror(int error); /** Gets the libopus version string. + * + * Applications may look for the substring "-fixed" in the version string to + * determine whether they have a fixed-point or floating-point build at + * runtime. * * @returns Version string */ diff --git a/media/libopus/include/opus_multistream.h b/media/libopus/include/opus_multistream.h index ae5997934a..47e03900bd 100644 --- a/media/libopus/include/opus_multistream.h +++ b/media/libopus/include/opus_multistream.h @@ -111,9 +111,9 @@ extern "C" { * duration, can be computed without any special negotiation. * * The format for multistream Opus packets is defined in the - * Ogg + * Ogg * encapsulation specification and is based on the self-delimited Opus - * framing described in Appendix B of RFC 6716. + * framing described in Appendix B of RFC 6716. * Normal Opus packets are just a degenerate case of multistream Opus packets, * and can be encoded or decoded with the multistream API by setting * streams to 1 when initializing the encoder or @@ -140,7 +140,7 @@ extern "C" { * * The output channels specified by the encoder * should use the - * Vorbis + * Vorbis * channel ordering. A decoder may wish to apply an additional permutation * to the mapping the encoder used to achieve a different output channel * order (e.g. for outputing in WAV order). diff --git a/media/libopus/moz.build b/media/libopus/moz.build index 90a6fa5927..9133257a93 100644 --- a/media/libopus/moz.build +++ b/media/libopus/moz.build @@ -19,7 +19,7 @@ ALLOW_COMPILER_WARNINGS = True FINAL_LIBRARY = 'gkmedias' DEFINES['OPUS_BUILD'] = True -DEFINES['OPUS_VERSION'] = '"v1.1-mozilla"' +DEFINES['OPUS_VERSION'] = '"v1.1.2-mozilla"' DEFINES['USE_ALLOCA'] = True # We only need to export symbols if we're built into libgkmedias @@ -71,9 +71,9 @@ LOCAL_INCLUDES += [ include('sources.mozbuild') UNIFIED_SOURCES += celt_sources -SOURCES += celt_nonunified_sources UNIFIED_SOURCES += silk_sources UNIFIED_SOURCES += opus_sources +SOURCES += opus_nonunified_sources if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']: LOCAL_INCLUDES += [ diff --git a/media/libopus/nonunified.patch b/media/libopus/nonunified.patch new file mode 100644 index 0000000000..cf9ff554fb --- /dev/null +++ b/media/libopus/nonunified.patch @@ -0,0 +1,38 @@ +diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild +index 8a39b9f..dfc2c62 100644 +--- a/media/libopus/sources.mozbuild ++++ b/media/libopus/sources.mozbuild +@@ -2,8 +2,6 @@ + celt_sources = [ + 'celt/bands.c', + 'celt/celt.c', +- 'celt/celt_decoder.c', +- 'celt/celt_encoder.c', + 'celt/celt_lpc.c', + 'celt/cwrs.c', + 'celt/entcode.c', +@@ -20,6 +18,15 @@ celt_sources = [ + 'celt/vq.c', + ] + ++opus_nonunified_sources = [ ++ # Disabled because of name clash of opus_custom_encoder_get_size. ++ 'celt/celt_decoder.c', ++ 'celt/celt_encoder.c', ++ # Disabled for (safe) warning about QA redefinition. ++ 'silk/LPC_inv_pred_gain.c', ++ 'silk/NLSF2A.c', ++] ++ + celt_sources_sse = [ + 'celt/x86/pitch_sse.c', + 'celt/x86/x86_celt_map.c', +@@ -105,8 +112,6 @@ silk_sources = [ + 'silk/log2lin.c', + 'silk/LP_variable_cutoff.c', + 'silk/LPC_analysis_filter.c', +- 'silk/LPC_inv_pred_gain.c', +- 'silk/NLSF2A.c', + 'silk/NLSF_decode.c', + 'silk/NLSF_del_dec_quant.c', + 'silk/NLSF_encode.c', diff --git a/media/libopus/silk/A2NLSF.c b/media/libopus/silk/A2NLSF.c index 74b1b95d6f..b6e9e5ffcc 100644 --- a/media/libopus/silk/A2NLSF.c +++ b/media/libopus/silk/A2NLSF.c @@ -71,8 +71,23 @@ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial ev y32 = p[ dd ]; /* Q16 */ x_Q16 = silk_LSHIFT( x, 4 ); - for( n = dd - 1; n >= 0; n-- ) { - y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ + + if ( opus_likely( 8 == dd ) ) + { + y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 ); + y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 ); + } + else + { + for( n = dd - 1; n >= 0; n-- ) { + y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ + } } return y32; } diff --git a/media/libopus/silk/API.h b/media/libopus/silk/API.h index f0601bcf6b..0131acbb08 100644 --- a/media/libopus/silk/API.h +++ b/media/libopus/silk/API.h @@ -111,7 +111,8 @@ opus_int silk_Decode( /* O Returns error co opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ ec_dec *psRangeDec, /* I/O Compressor data structure */ opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut /* O Number of samples decoded */ + opus_int32 *nSamplesOut, /* O Number of samples decoded */ + int arch /* I Run-time architecture */ ); #if 0 diff --git a/media/libopus/silk/CNG.c b/media/libopus/silk/CNG.c index 8481d95dbe..61787c2302 100644 --- a/media/libopus/silk/CNG.c +++ b/media/libopus/silk/CNG.c @@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. /* Generates excitation for CNG LPC synthesis */ static OPUS_INLINE void silk_CNG_exc( - opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ + opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ opus_int32 Gain_Q16, /* I Gain to apply */ opus_int length, /* I Length */ @@ -55,7 +55,7 @@ static OPUS_INLINE void silk_CNG_exc( idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); silk_assert( idx >= 0 ); silk_assert( idx <= CNG_BUF_MASK_MAX ); - residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); + exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); } *rand_seed = seed; } @@ -85,7 +85,7 @@ void silk_CNG( ) { opus_int i, subfr; - opus_int32 sum_Q6, max_Gain_Q16; + opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; opus_int16 A_Q12[ MAX_LPC_ORDER ]; silk_CNG_struct *psCNG = &psDec->sCNG; SAVE_STACK; @@ -125,11 +125,20 @@ void silk_CNG( /* Add CNG when packet is lost or during DTX */ if( psDec->lossCnt ) { VARDECL( opus_int32, CNG_sig_Q10 ); - ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); /* Generate CNG excitation */ - silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); + gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); + if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { + gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); + } else { + gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); + } + silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); /* Convert CNG NLSF to filter representation */ silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); @@ -162,7 +171,7 @@ void silk_CNG( /* Update states */ CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); - frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) ); + frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); } silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); } else { diff --git a/media/libopus/silk/LPC_analysis_filter.c b/media/libopus/silk/LPC_analysis_filter.c index 9d1f16cb7d..20906673ff 100644 --- a/media/libopus/silk/LPC_analysis_filter.c +++ b/media/libopus/silk/LPC_analysis_filter.c @@ -44,7 +44,8 @@ void silk_LPC_analysis_filter( const opus_int16 *in, /* I Input signal */ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ const opus_int32 len, /* I Signal length */ - const opus_int32 d /* I Filter order */ + const opus_int32 d, /* I Filter order */ + int arch /* I Run-time architecture */ ) { opus_int j; @@ -69,11 +70,12 @@ void silk_LPC_analysis_filter( for (j=0;j 0 ) { + out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else if( i == 0 ) { + out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else if( i == -1 ) { + out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else { + out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } + out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 ); + out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 ); + } + silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ nStates = 1; @@ -73,21 +95,9 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns ind[ j ][ i ] = (opus_int8)ind_tmp; /* compute outputs for ind_tmp and ind_tmp + 1 */ - out0_Q10 = silk_LSHIFT( ind_tmp, 10 ); - out1_Q10 = silk_ADD16( out0_Q10, 1024 ); - if( ind_tmp > 0 ) { - out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( ind_tmp == 0 ) { - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( ind_tmp == -1 ) { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } - out0_Q10 = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 ); - out1_Q10 = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 ); + out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; + out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; + out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 ); out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 ); prev_out_Q10[ j ] = out0_Q10; diff --git a/media/libopus/silk/NSQ.c b/media/libopus/silk/NSQ.c index cf5b3fd547..a065884070 100644 --- a/media/libopus/silk/NSQ.c +++ b/media/libopus/silk/NSQ.c @@ -46,6 +46,7 @@ static OPUS_INLINE void silk_nsq_scale_states( const opus_int signal_type /* I Signal type */ ); +#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) static OPUS_INLINE void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ @@ -67,8 +68,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer( opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ opus_int predictLPCOrder /* I Prediction filter order */ ); +#endif -void silk_NSQ( +void silk_NSQ_c +( const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ @@ -141,7 +144,7 @@ void silk_NSQ( silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder ); + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); NSQ->rewhite_flag = 1; NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; @@ -172,7 +175,11 @@ void silk_NSQ( /***********************************/ /* silk_noise_shape_quantizer */ /***********************************/ -static OPUS_INLINE void silk_noise_shape_quantizer( + +#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +static OPUS_INLINE +#endif +void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ const opus_int32 x_sc_Q10[], /* I */ diff --git a/media/libopus/silk/NSQ_del_dec.c b/media/libopus/silk/NSQ_del_dec.c index 522be40663..aff560c221 100644 --- a/media/libopus/silk/NSQ_del_dec.c +++ b/media/libopus/silk/NSQ_del_dec.c @@ -57,6 +57,9 @@ typedef struct { typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; +#if defined(MIPSr1_ASM) +#include "mips/NSQ_del_dec_mipsr1.h" +#endif static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -106,7 +109,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int decisionDelay /* I */ ); -void silk_NSQ_del_dec( +void silk_NSQ_del_dec_c( const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ @@ -244,7 +247,7 @@ void silk_NSQ_del_dec( silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder ); + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; NSQ->rewhite_flag = 1; @@ -303,6 +306,7 @@ void silk_NSQ_del_dec( /******************************************/ /* Noise shape quantizer for one subframe */ /******************************************/ +#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ @@ -629,6 +633,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( } RESTORE_STACK; } +#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */ static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ diff --git a/media/libopus/silk/PLC.c b/media/libopus/silk/PLC.c index 01f40014c4..34a94bc313 100644 --- a/media/libopus/silk/PLC.c +++ b/media/libopus/silk/PLC.c @@ -46,7 +46,8 @@ static OPUS_INLINE void silk_PLC_update( static OPUS_INLINE void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[] /* O LPC residual signal */ + opus_int16 frame[], /* O LPC residual signal */ + int arch /* I Run-time architecture */ ); @@ -65,7 +66,8 @@ void silk_PLC( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ opus_int16 frame[], /* I/O signal */ - opus_int lost /* I Loss flag */ + opus_int lost, /* I Loss flag */ + int arch /* I Run-time architecture */ ) { /* PLC control function */ @@ -78,7 +80,7 @@ void silk_PLC( /****************************/ /* Generate Signal */ /****************************/ - silk_PLC_conceal( psDec, psDecCtrl, frame ); + silk_PLC_conceal( psDec, psDecCtrl, frame, arch ); psDec->lossCnt++; } else { @@ -165,10 +167,35 @@ static OPUS_INLINE void silk_PLC_update( psPLC->nb_subfr = psDec->nb_subfr; } +static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, + const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) +{ + int i, k; + VARDECL( opus_int16, exc_buf ); + opus_int16 *exc_buf_ptr; + SAVE_STACK; + ALLOC( exc_buf, 2*subfr_length, opus_int16 ); + /* Find random noise component */ + /* Scale previous excitation signal */ + exc_buf_ptr = exc_buf; + for( k = 0; k < 2; k++ ) { + for( i = 0; i < subfr_length; i++ ) { + exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( + silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); + } + exc_buf_ptr += subfr_length; + } + /* Find the subframe with lowest energy of the last two and use that as random noise generator */ + silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); + silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); + RESTORE_STACK; +} + static OPUS_INLINE void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[] /* O LPC residual signal */ + opus_int16 frame[], /* O LPC residual signal */ + int arch /* I Run-time architecture */ ) { opus_int i, j, k; @@ -177,19 +204,26 @@ static OPUS_INLINE void silk_PLC_conceal( opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; opus_int32 LPC_pred_Q10, LTP_pred_Q12; opus_int16 rand_scale_Q14; - opus_int16 *B_Q14, *exc_buf_ptr; + opus_int16 *B_Q14; opus_int32 *sLPC_Q14_ptr; - VARDECL( opus_int16, exc_buf ); opus_int16 A_Q12[ MAX_LPC_ORDER ]; +#ifdef SMALL_FOOTPRINT + opus_int16 *sLTP; +#else VARDECL( opus_int16, sLTP ); +#endif VARDECL( opus_int32, sLTP_Q14 ); silk_PLC_struct *psPLC = &psDec->sPLC; opus_int32 prevGain_Q10[2]; SAVE_STACK; - ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 ); - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); +#ifdef SMALL_FOOTPRINT + /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ + sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; +#else + ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); +#endif prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); @@ -198,19 +232,7 @@ static OPUS_INLINE void silk_PLC_conceal( silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); } - /* Find random noise component */ - /* Scale previous excitation signal */ - exc_buf_ptr = exc_buf; - for( k = 0; k < 2; k++ ) { - for( i = 0; i < psPLC->subfr_length; i++ ) { - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( - silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) ); - } - exc_buf_ptr += psPLC->subfr_length; - } - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ - silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); - silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); + silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { /* First sub-frame has lowest energy */ @@ -270,7 +292,7 @@ static OPUS_INLINE void silk_PLC_conceal( /* Rewhiten LTP state */ idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; silk_assert( idx > 0 ); - silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order ); + silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch ); /* Scale LTP state */ inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 ); inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 ); diff --git a/media/libopus/silk/PLC.h b/media/libopus/silk/PLC.h index f1e2eccc69..6438f51633 100644 --- a/media/libopus/silk/PLC.h +++ b/media/libopus/silk/PLC.h @@ -48,7 +48,8 @@ void silk_PLC( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ opus_int16 frame[], /* I/O signal */ - opus_int lost /* I Loss flag */ + opus_int lost, /* I Loss flag */ + int arch /* I Run-time architecture */ ); void silk_PLC_glue_frames( diff --git a/media/libopus/silk/SigProc_FIX.h b/media/libopus/silk/SigProc_FIX.h index 1b58057910..b63299441e 100644 --- a/media/libopus/silk/SigProc_FIX.h +++ b/media/libopus/silk/SigProc_FIX.h @@ -41,7 +41,11 @@ extern "C" #include "typedef.h" #include "resampler_structs.h" #include "macros.h" +#include "cpu_support.h" +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) +#include "x86/SigProc_FIX_sse.h" +#endif /********************************************************************/ /* SIGNAL PROCESSING FUNCTIONS */ @@ -108,7 +112,8 @@ void silk_LPC_analysis_filter( const opus_int16 *in, /* I Input signal */ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ const opus_int32 len, /* I Signal length */ - const opus_int32 d /* I Filter order */ + const opus_int32 d, /* I Filter order */ + int arch /* I Run-time architecture */ ); /* Chirp (bandwidth expand) LP AR filter */ @@ -303,7 +308,7 @@ void silk_NLSF_VQ_weights_laroia( ); /* Compute reflection coefficients from input signal */ -void silk_burg_modified( +void silk_burg_modified_c( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ @@ -335,12 +340,15 @@ void silk_scale_vector32_Q26_lshift_18( /********************************************************************/ /* return sum( inVec1[i] * inVec2[i] ) */ + opus_int32 silk_inner_prod_aligned( const opus_int16 *const inVec1, /* I input vector 1 */ const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ + const opus_int len, /* I vector lengths */ + int arch /* I Run-time architecture */ ); + opus_int32 silk_inner_prod_aligned_scale( const opus_int16 *const inVec1, /* I input vector 1 */ const opus_int16 *const inVec2, /* I input vector 2 */ @@ -348,7 +356,7 @@ opus_int32 silk_inner_prod_aligned_scale( const opus_int len /* I vector lengths */ ); -opus_int64 silk_inner_prod16_aligned_64( +opus_int64 silk_inner_prod16_aligned_64_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ @@ -575,6 +583,14 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) /* the following seems faster on x86 */ #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) +#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ + ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) + +#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len)) +#endif + #include "Inlines.h" #include "MacroCount.h" #include "MacroDebug.h" @@ -587,6 +603,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) #include "arm/SigProc_FIX_armv5e.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/sigproc_fix_mipsr1.h" +#endif + + #ifdef __cplusplus } #endif diff --git a/media/libopus/silk/VAD.c b/media/libopus/silk/VAD.c index a809098143..0a782af2f1 100644 --- a/media/libopus/silk/VAD.c +++ b/media/libopus/silk/VAD.c @@ -33,10 +33,12 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" /* Silk VAD noise level estimation */ +# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) static OPUS_INLINE void silk_VAD_GetNoiseLevels( const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ); +#endif /**********************************/ /* Initialization of the Silk VAD */ @@ -77,7 +79,7 @@ static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -120 /***************************************/ /* Get the speech activity level in Q8 */ /***************************************/ -opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ +opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ silk_encoder_state *psEncC, /* I/O Encoder state */ const opus_int16 pIn[] /* I PCM input */ ) @@ -296,7 +298,10 @@ opus_int silk_VAD_GetSA_Q8( /* O Return v /**************************/ /* Noise level estimation */ /**************************/ -static OPUS_INLINE void silk_VAD_GetNoiseLevels( +# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +static OPUS_INLINE +#endif +void silk_VAD_GetNoiseLevels( const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ) diff --git a/media/libopus/silk/VQ_WMat_EC.c b/media/libopus/silk/VQ_WMat_EC.c index 13d5d34edd..7983f1db80 100644 --- a/media/libopus/silk/VQ_WMat_EC.c +++ b/media/libopus/silk/VQ_WMat_EC.c @@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC( +void silk_VQ_WMat_EC_c( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ @@ -55,7 +55,7 @@ void silk_VQ_WMat_EC( *rate_dist_Q14 = silk_int32_MAX; cb_row_Q7 = cb_Q7; for( k = 0; k < L; k++ ) { - gain_tmp_Q7 = cb_gain_Q7[k]; + gain_tmp_Q7 = cb_gain_Q7[k]; diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); @@ -66,8 +66,8 @@ void silk_VQ_WMat_EC( /* Weighted rate */ sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); - /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); + /* Penalty for too large gain */ + sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); silk_assert( sum1_Q14 >= 0 ); @@ -111,7 +111,7 @@ void silk_VQ_WMat_EC( if( sum1_Q14 < *rate_dist_Q14 ) { *rate_dist_Q14 = sum1_Q14; *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; + *gain_Q7 = gain_tmp_Q7; } /* Go to next cbk vector */ diff --git a/media/libopus/silk/code_signs.c b/media/libopus/silk/code_signs.c index 0419ea2626..dfd1dca9a1 100644 --- a/media/libopus/silk/code_signs.c +++ b/media/libopus/silk/code_signs.c @@ -74,7 +74,7 @@ void silk_encode_signs( /* Decodes signs of excitation */ void silk_decode_signs( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* I/O pulse signal */ + opus_int16 pulses[], /* I/O pulse signal */ opus_int length, /* I length of input */ const opus_int signalType, /* I Signal type */ const opus_int quantOffsetType, /* I Quantization offset type */ @@ -83,7 +83,7 @@ void silk_decode_signs( { opus_int i, j, p; opus_uint8 icdf[ 2 ]; - opus_int *q_ptr; + opus_int16 *q_ptr; const opus_uint8 *icdf_ptr; icdf[ 1 ] = 0; diff --git a/media/libopus/silk/control_SNR.c b/media/libopus/silk/control_SNR.c index f04e69fce2..cee87eb0d8 100644 --- a/media/libopus/silk/control_SNR.c +++ b/media/libopus/silk/control_SNR.c @@ -70,11 +70,6 @@ opus_int silk_control_SNR( break; } } - - /* Reduce coding quality whenever LBRR is enabled, to free up some bits */ - if( psEncC->LBRR_enabled ) { - psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) ); - } } return ret; diff --git a/media/libopus/silk/control_codec.c b/media/libopus/silk/control_codec.c index 1f674bddb6..044eea3f2a 100644 --- a/media/libopus/silk/control_codec.c +++ b/media/libopus/silk/control_codec.c @@ -397,9 +397,10 @@ static OPUS_INLINE opus_int silk_setup_LBRR( const opus_int32 TargetRate_bps /* I */ ) { - opus_int ret = SILK_NO_ERROR; + opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR; opus_int32 LBRR_rate_thres_bps; + LBRR_in_previous_packet = psEncC->LBRR_enabled; psEncC->LBRR_enabled = 0; if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { if( psEncC->fs_kHz == 8 ) { @@ -413,8 +414,13 @@ static OPUS_INLINE opus_int silk_setup_LBRR( if( TargetRate_bps > LBRR_rate_thres_bps ) { /* Set gain increase for coding LBRR excitation */ + if( LBRR_in_previous_packet == 0 ) { + /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ + psEncC->LBRR_GainIncreases = 7; + } else { + psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); + } psEncC->LBRR_enabled = 1; - psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); } } diff --git a/media/libopus/silk/dec_API.c b/media/libopus/silk/dec_API.c index 4cbcf71514..b7d8ed48d8 100644 --- a/media/libopus/silk/dec_API.c +++ b/media/libopus/silk/dec_API.c @@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" #include "stack_alloc.h" +#include "os_support.h" /************************/ /* Decoder Super Struct */ @@ -84,13 +85,15 @@ opus_int silk_Decode( /* O Returns error co opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ ec_dec *psRangeDec, /* I/O Compressor data structure */ opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut /* O Number of samples decoded */ + opus_int32 *nSamplesOut, /* O Number of samples decoded */ + int arch /* I Run-time architecture */ ) { opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage ); + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); VARDECL( opus_int16, samplesOut2_tmp ); opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int16 *resample_out_ptr; @@ -98,6 +101,7 @@ opus_int silk_Decode( /* O Returns error co silk_decoder_state *channel_state = psDec->channel_state; opus_int has_side; opus_int stereo_to_mono; + int delay_stack_alloc; SAVE_STACK; silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); @@ -196,7 +200,7 @@ opus_int silk_Decode( /* O Returns error co for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { for( n = 0; n < decControl->nChannelsInternal; n++ ) { if( channel_state[ n ].LBRR_flags[ i ] ) { - opus_int pulses[ MAX_FRAME_LENGTH ]; + opus_int16 pulses[ MAX_FRAME_LENGTH ]; opus_int condCoding; if( decControl->nChannelsInternal == 2 && n == 0 ) { @@ -251,13 +255,22 @@ opus_int silk_Decode( /* O Returns error co psDec->channel_state[ 1 ].first_frame_after_reset = 1; } - ALLOC( samplesOut1_tmp_storage, - decControl->nChannelsInternal*( - channel_state[ 0 ].frame_length + 2 ), + /* Check if the temp buffer fits into the output PCM buffer. If it fits, + we can delay allocating the temp buffer until after the SILK peak stack + usage. We need to use a < and not a <= because of the two extra samples. */ + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal + < decControl->API_sampleRate*decControl->nChannelsAPI; + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), opus_int16 ); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage - + channel_state[ 0 ].frame_length + 2; + if ( delay_stack_alloc ) + { + samplesOut1_tmp[ 0 ] = samplesOut; + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; + } else { + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; + } if( lostFlag == FLAG_DECODE_NORMAL ) { has_side = !decode_only_middle; @@ -284,7 +297,7 @@ opus_int silk_Decode( /* O Returns error co } else { condCoding = CODE_CONDITIONALLY; } - ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); + ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch); } else { silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); } @@ -312,6 +325,15 @@ opus_int silk_Decode( /* O Returns error co resample_out_ptr = samplesOut; } + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) + : ALLOC_NONE, + opus_int16 ); + if ( delay_stack_alloc ) { + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; + } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { /* Resample decoded signal to API_sampleRate */ diff --git a/media/libopus/silk/decode_core.c b/media/libopus/silk/decode_core.c index a820bf11d6..b88991e349 100644 --- a/media/libopus/silk/decode_core.c +++ b/media/libopus/silk/decode_core.c @@ -39,7 +39,8 @@ void silk_decode_core( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I Decoder control */ opus_int16 xq[], /* O Decoded speech */ - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ + const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ + int arch /* I Run-time architecture */ ) { opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; @@ -147,7 +148,7 @@ void silk_decode_core( } silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ], - A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order ); + A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch ); /* After rewhitening the LTP state is unscaled */ if( k == 0 ) { diff --git a/media/libopus/silk/decode_frame.c b/media/libopus/silk/decode_frame.c index abc00a3d54..a605d95ac6 100644 --- a/media/libopus/silk/decode_frame.c +++ b/media/libopus/silk/decode_frame.c @@ -42,18 +42,16 @@ opus_int silk_decode_frame( opus_int16 pOut[], /* O Pointer to output speech frame */ opus_int32 *pN, /* O Pointer to size of output frame */ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding /* I The type of conditional coding to use */ + opus_int condCoding, /* I The type of conditional coding to use */ + int arch /* I Run-time architecture */ ) { VARDECL( silk_decoder_control, psDecCtrl ); opus_int L, mv_len, ret = 0; - VARDECL( opus_int, pulses ); SAVE_STACK; L = psDec->frame_length; ALLOC( psDecCtrl, 1, silk_decoder_control ); - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int ); psDecCtrl->LTP_scale_Q14 = 0; /* Safety checks */ @@ -62,6 +60,9 @@ opus_int silk_decode_frame( if( lostFlag == FLAG_DECODE_NORMAL || ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) { + VARDECL( opus_int16, pulses ); + ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & + ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); /*********************************************/ /* Decode quantization indices of side info */ /*********************************************/ @@ -81,12 +82,12 @@ opus_int silk_decode_frame( /********************************************************/ /* Run inverse NSQ */ /********************************************************/ - silk_decode_core( psDec, psDecCtrl, pOut, pulses ); + silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch ); /********************************************************/ /* Update PLC state */ /********************************************************/ - silk_PLC( psDec, psDecCtrl, pOut, 0 ); + silk_PLC( psDec, psDecCtrl, pOut, 0, arch ); psDec->lossCnt = 0; psDec->prevSignalType = psDec->indices.signalType; @@ -96,7 +97,7 @@ opus_int silk_decode_frame( psDec->first_frame_after_reset = 0; } else { /* Handle packet loss by extrapolation */ - silk_PLC( psDec, psDecCtrl, pOut, 1 ); + silk_PLC( psDec, psDecCtrl, pOut, 1, arch ); } /*************************/ @@ -107,16 +108,16 @@ opus_int silk_decode_frame( silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); - /****************************************************************/ - /* Ensure smooth connection of extrapolated and good frames */ - /****************************************************************/ - silk_PLC_glue_frames( psDec, pOut, L ); - /************************************************/ /* Comfort noise generation / estimation */ /************************************************/ silk_CNG( psDec, psDecCtrl, pOut, L ); + /****************************************************************/ + /* Ensure smooth connection of extrapolated and good frames */ + /****************************************************************/ + silk_PLC_glue_frames( psDec, pOut, L ); + /* Update some decoder state variables */ psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; diff --git a/media/libopus/silk/decode_pulses.c b/media/libopus/silk/decode_pulses.c index e8a87c2ab7..d6bbec9225 100644 --- a/media/libopus/silk/decode_pulses.c +++ b/media/libopus/silk/decode_pulses.c @@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. /*********************************************/ void silk_decode_pulses( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* O Excitation signal */ + opus_int16 pulses[], /* O Excitation signal */ const opus_int signalType, /* I Sigtype */ const opus_int quantOffsetType, /* I quantOffsetType */ const opus_int frame_length /* I Frame length */ @@ -44,7 +44,7 @@ void silk_decode_pulses( { opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; - opus_int *pulses_ptr; + opus_int16 *pulses_ptr; const opus_uint8 *cdf_ptr; /*********************/ @@ -69,9 +69,9 @@ void silk_decode_pulses( sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 ); /* LSB indication */ - while( sum_pulses[ i ] == MAX_PULSES + 1 ) { + while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) { nLshifts[ i ]++; - /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */ + /* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */ sum_pulses[ i ] = ec_dec_icdf( psRangeDec, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 ); } @@ -84,7 +84,7 @@ void silk_decode_pulses( if( sum_pulses[ i ] > 0 ) { silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); } else { - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) ); + silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); } } diff --git a/media/libopus/silk/define.h b/media/libopus/silk/define.h index c47aca9f58..19c9b00e25 100644 --- a/media/libopus/silk/define.h +++ b/media/libopus/silk/define.h @@ -169,7 +169,7 @@ extern "C" #define N_RATE_LEVELS 10 /* Maximum sum of pulses per shell coding frame */ -#define MAX_PULSES 16 +#define SILK_MAX_PULSES 16 #define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ diff --git a/media/libopus/silk/enc_API.c b/media/libopus/silk/enc_API.c index 43739efc24..f8060286db 100644 --- a/media/libopus/silk/enc_API.c +++ b/media/libopus/silk/enc_API.c @@ -165,7 +165,7 @@ opus_int silk_Encode( /* O Returns error co psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; /* Check values in encoder control structure */ - if( ( ret = check_control_input( encControl ) != 0 ) ) { + if( ( ret = check_control_input( encControl ) ) != 0 ) { silk_assert( 0 ); RESTORE_STACK; return ret; @@ -376,26 +376,33 @@ opus_int silk_Encode( /* O Returns error co for( n = 0; n < encControl->nChannelsInternal; n++ ) { silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); } + + psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); } silk_HP_variable_cutoff( psEnc->state_Fxx ); /* Total target bits for packet */ nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); - /* Subtract half of the bits already used */ + /* Subtract bits used for LBRR */ if( !prefillFlag ) { - nBits -= ec_tell( psRangeEnc ) >> 1; + nBits -= psEnc->nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ - nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ); + nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); /* Convert to bits/second */ if( encControl->payloadSize_ms == 10 ) { TargetRate_bps = silk_SMULBB( nBits, 100 ); } else { TargetRate_bps = silk_SMULBB( nBits, 50 ); } - /* Subtract fraction of bits in excess of target in previous packets */ + /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); + if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { + /* Compare actual vs target bits so far in this packet */ + opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; + TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); + } /* Never exceed input bitrate */ TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); diff --git a/media/libopus/silk/encode_pulses.c b/media/libopus/silk/encode_pulses.c index a4501438d1..ab00264f99 100644 --- a/media/libopus/silk/encode_pulses.c +++ b/media/libopus/silk/encode_pulses.c @@ -142,7 +142,7 @@ void silk_encode_pulses( sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ]; for( i = 0; i < iter; i++ ) { if( nRshifts[ i ] > 0 ) { - sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ]; + sumBits_Q5 += nBits_ptr[ SILK_MAX_PULSES + 1 ]; } else { sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ]; } @@ -162,9 +162,9 @@ void silk_encode_pulses( if( nRshifts[ i ] == 0 ) { ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 ); } else { - ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 ); + ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, cdf_ptr, 8 ); for( k = 0; k < nRshifts[ i ] - 1; k++ ) { - ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); + ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); } ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); } diff --git a/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c b/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c index a94190808e..5574e7069f 100644 --- a/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c +++ b/media/libopus/silk/fixed/LTP_analysis_filter_FIX.c @@ -45,7 +45,7 @@ void silk_LTP_analysis_filter_FIX( const opus_int16 *x_ptr, *x_lag_ptr; opus_int16 Btmp_Q14[ LTP_ORDER ]; opus_int16 *LTP_res_ptr; - opus_int k, i, j; + opus_int k, i; opus_int32 LTP_est; x_ptr = x; @@ -53,9 +53,12 @@ void silk_LTP_analysis_filter_FIX( for( k = 0; k < nb_subfr; k++ ) { x_lag_ptr = x_ptr - pitchL[ k ]; - for( i = 0; i < LTP_ORDER; i++ ) { - Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ]; - } + + Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ]; + Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ]; + Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ]; + Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ]; + Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ]; /* LTP analysis FIR filter */ for( i = 0; i < subfr_length + pre_length; i++ ) { @@ -63,9 +66,11 @@ void silk_LTP_analysis_filter_FIX( /* Long-term prediction */ LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] ); - for( j = 1; j < LTP_ORDER; j++ ) { - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] ); - } + LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] ); + LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] ); + LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] ); + LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] ); + LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/ /* Subtract long-term prediction */ diff --git a/media/libopus/silk/fixed/burg_modified_FIX.c b/media/libopus/silk/fixed/burg_modified_FIX.c index db348295bf..4878553b65 100644 --- a/media/libopus/silk/fixed/burg_modified_FIX.c +++ b/media/libopus/silk/fixed/burg_modified_FIX.c @@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. #define MAX_RSHIFTS (32 - QA) /* Compute reflection coefficients from input signal */ -void silk_burg_modified( +void silk_burg_modified_c( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ @@ -54,7 +54,7 @@ void silk_burg_modified( int arch /* I Run-time architecture */ ) { - opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; + opus_int k, n, s, lz, rshifts, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; @@ -63,27 +63,23 @@ void silk_burg_modified( opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; + opus_int64 C0_64; silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ - silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); - if( rshifts > MAX_RSHIFTS ) { - C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); - silk_assert( C0 > 0 ); - rshifts = MAX_RSHIFTS; + C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); + lz = silk_CLZ64(C0_64); + rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; + if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; + if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; + + if (rshifts > 0) { + C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); } else { - lz = silk_CLZ32( C0 ) - 1; - rshifts_extra = N_BITS_HEAD_ROOM - lz; - if( rshifts_extra > 0 ) { - rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); - C0 = silk_RSHIFT32( C0, rshifts_extra ); - } else { - rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); - C0 = silk_LSHIFT32( C0, -rshifts_extra ); - } - rshifts += rshifts_extra; + C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); } + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { @@ -91,7 +87,7 @@ void silk_burg_modified( x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); + silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { @@ -252,12 +248,12 @@ void silk_burg_modified( if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts ); + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts ); + C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); } } /* Approximate residual energy */ diff --git a/media/libopus/silk/fixed/corrMatrix_FIX.c b/media/libopus/silk/fixed/corrMatrix_FIX.c index c617270536..c1d437c785 100644 --- a/media/libopus/silk/fixed/corrMatrix_FIX.c +++ b/media/libopus/silk/fixed/corrMatrix_FIX.c @@ -42,7 +42,8 @@ void silk_corrVector_FIX( const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts /* I Right shifts of correlations */ + const opus_int rshifts, /* I Right shifts of correlations */ + int arch /* I Run-time architecture */ ) { opus_int lag, i; @@ -65,7 +66,7 @@ void silk_corrVector_FIX( } else { silk_assert( rshifts == 0 ); for( lag = 0; lag < order; lag++ ) { - Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */ + Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */ ptr1--; /* Go to next column of X */ } } @@ -78,7 +79,8 @@ void silk_corrMatrix_FIX( const opus_int order, /* I Max lag for correlation */ const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts /* I/O Right shifts of correlations */ + opus_int *rshifts, /* I/O Right shifts of correlations */ + int arch /* I Run-time architecture */ ) { opus_int i, j, lag, rshifts_local, head_room_rshifts; @@ -138,7 +140,7 @@ void silk_corrMatrix_FIX( } else { for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ - energy = silk_inner_prod_aligned( ptr1, ptr2, L ); + energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ diff --git a/media/libopus/silk/fixed/encode_frame_FIX.c b/media/libopus/silk/fixed/encode_frame_FIX.c index b490986b97..5ef44b03fc 100644 --- a/media/libopus/silk/fixed/encode_frame_FIX.c +++ b/media/libopus/silk/fixed/encode_frame_FIX.c @@ -48,7 +48,7 @@ void silk_encode_do_VAD_FIX( /****************************/ /* Voice Activity Detection */ /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); + silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ @@ -196,11 +196,13 @@ opus_int silk_encode_frame_FIX( if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 ); + sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, + psEnc->sCmn.arch ); } else { silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 ); + sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, + psEnc->sCmn.arch); } /****************************************/ @@ -287,7 +289,7 @@ opus_int silk_encode_frame_FIX( for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); } - + /* Quantize gains */ psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, @@ -371,12 +373,12 @@ static OPUS_INLINE void silk_LBRR_encode_FIX( silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 ); + psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } else { silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 ); + psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } /* Restore original gains */ diff --git a/media/libopus/silk/fixed/find_LPC_FIX.c b/media/libopus/silk/fixed/find_LPC_FIX.c index 783d32e20f..e11cdc86e6 100644 --- a/media/libopus/silk/fixed/find_LPC_FIX.c +++ b/media/libopus/silk/fixed/find_LPC_FIX.c @@ -95,7 +95,7 @@ void silk_find_LPC_FIX( silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); /* Calculate residual energy with NLSF interpolation */ - silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder ); + silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch ); silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ); silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ); diff --git a/media/libopus/silk/fixed/find_LTP_FIX.c b/media/libopus/silk/fixed/find_LTP_FIX.c index 8c4d70376c..1314a28137 100644 --- a/media/libopus/silk/fixed/find_LTP_FIX.c +++ b/media/libopus/silk/fixed/find_LTP_FIX.c @@ -50,7 +50,8 @@ void silk_find_LTP_FIX( const opus_int subfr_length, /* I subframe length */ const opus_int nb_subfr, /* I number of subframes */ const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ + int arch /* I Run-time architecture */ ) { opus_int i, k, lshift; @@ -84,10 +85,10 @@ void silk_find_LTP_FIX( rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); } corr_rshifts[ k ] = rr_shifts; - silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ + silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ - silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ + silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ if( corr_rshifts[ k ] > rr_shifts ) { rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ } diff --git a/media/libopus/silk/fixed/find_pitch_lags_FIX.c b/media/libopus/silk/fixed/find_pitch_lags_FIX.c index 620f8dcd2c..b8440a8247 100644 --- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c +++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c @@ -112,7 +112,7 @@ void silk_find_pitch_lags_FIX( /*****************************************/ /* LPC analysis filtering */ /*****************************************/ - silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); + silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { /* Threshold for pitch estimator */ diff --git a/media/libopus/silk/fixed/find_pred_coefs_FIX.c b/media/libopus/silk/fixed/find_pred_coefs_FIX.c index 5c22f8288b..d308e9cf5f 100644 --- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c +++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c @@ -89,11 +89,12 @@ void silk_find_pred_coefs_FIX( /* LTP analysis */ silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, - psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift ); + psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch); /* Control LTP scaling */ silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); @@ -118,16 +119,16 @@ void silk_find_pred_coefs_FIX( silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) ); psEncCtrl->LTPredCodGain_Q7 = 0; - psEnc->sCmn.sum_log_gain_Q7 = 0; + psEnc->sCmn.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if( psEnc->sCmn.first_frame_after_reset ) { minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 ); - } else { + } else { minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */ - minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, - silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), + minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, + silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 ); } @@ -139,7 +140,7 @@ void silk_find_pred_coefs_FIX( /* Calculate residual energy using quantized LPC coefficients */ silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains, - psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); + psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch ); /* Copy to prediction struct for use in next frame for interpolation */ silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); diff --git a/media/libopus/silk/fixed/main_FIX.h b/media/libopus/silk/fixed/main_FIX.h index a56ca07a22..375b5eb32e 100644 --- a/media/libopus/silk/fixed/main_FIX.h +++ b/media/libopus/silk/fixed/main_FIX.h @@ -97,6 +97,17 @@ void silk_prefilter_FIX( const opus_int16 x[] /* I Speech signal */ ); +void silk_warped_LPC_analysis_filter_FIX_c( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + + /**************************/ /* Noise shaping analysis */ /**************************/ @@ -166,7 +177,8 @@ void silk_find_LTP_FIX( const opus_int subfr_length, /* I subframe length */ const opus_int nb_subfr, /* I number of subframes */ const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ + int arch /* I Run-time architecture */ ); void silk_LTP_analysis_filter_FIX( @@ -190,7 +202,8 @@ void silk_residual_energy_FIX( const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ const opus_int subfr_length, /* I Subframe length */ const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order /* I LPC order */ + const opus_int LPC_order, /* I LPC order */ + int arch /* I Run-time architecture */ ); /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ @@ -220,7 +233,8 @@ void silk_corrMatrix_FIX( const opus_int order, /* I Max lag for correlation */ const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts /* I/O Right shifts of correlations */ + opus_int *rshifts, /* I/O Right shifts of correlations */ + int arch /* I Run-time architecture */ ); /* Calculates correlation vector X'*t */ @@ -230,7 +244,8 @@ void silk_corrVector_FIX( const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts /* I Right shifts of correlations */ + const opus_int rshifts, /* I Right shifts of correlations */ + int arch /* I Run-time architecture */ ); /* Add noise to matrix diagonal */ diff --git a/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h new file mode 100644 index 0000000000..c30481e437 --- /dev/null +++ b/media/libopus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h @@ -0,0 +1,336 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + + +/**************************************************************/ +/* Compute noise shaping coefficients and initial gain values */ +/**************************************************************/ +#define OVERRIDE_silk_noise_shape_analysis_FIX + +void silk_noise_shape_analysis_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ + const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ +) +{ + silk_shape_state_FIX *psShapeSt = &psEnc->sShape; + opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; + opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; + opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; + opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; + opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; + VARDECL( opus_int16, x_windowed ); + const opus_int16 *x_ptr, *pitch_res_ptr; + SAVE_STACK; + + /* Point to start of first LPC analysis block */ + x_ptr = x - psEnc->sCmn.la_shape; + + /****************/ + /* GAIN CONTROL */ + /****************/ + SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; + + /* Input quality is the average of the quality in the lowest two VAD bands */ + psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] + + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); + + /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ + psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - + SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); + + /* Reduce coding SNR during low speech activity */ + if( psEnc->sCmn.useCBR == 0 ) { + b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; + b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, + silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ + silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ + } + + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce gains for periodic signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); + } else { + /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, + silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), + SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); + } + + /*************************/ + /* SPARSENESS PROCESSING */ + /*************************/ + /* Set quantizer offset */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Initially set to 0; may be overruled in process_gains(..) */ + psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness_Q8 = 0; + } else { + /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ + nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); + energy_variation_Q7 = 0; + log_energy_prev_Q7 = 0; + pitch_res_ptr = pitch_res; + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { + silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); + nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ + + log_energy_Q7 = silk_lin2log( nrg ); + if( k > 0 ) { + energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); + } + log_energy_prev_Q7 = log_energy_Q7; + pitch_res_ptr += nSamples; + } + + psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - + SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); + + /* Set quantization offset depending on sparseness measure */ + if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { + psEnc->sCmn.indices.quantOffsetType = 0; + } else { + psEnc->sCmn.indices.quantOffsetType = 1; + } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); + } + + /*******************************/ + /* Control bandwidth expansion */ + /*******************************/ + /* More BWE for signals with high prediction gain */ + strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); + BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), + silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); + delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), + SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); + BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); + BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ + warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); + } else { + warping_Q16 = 0; + } + + /********************************************/ + /* Compute noise shaping AR coefs and gains */ + /********************************************/ + ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Apply window: sine slope followed by flat part followed by cosine slope */ + opus_int shift, slope_part, flat_part; + flat_part = psEnc->sCmn.fs_kHz * 3; + slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); + + silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); + shift = slope_part; + silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); + shift += flat_part; + silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); + + /* Update pointer: next LPC analysis block */ + x_ptr += psEnc->sCmn.subfr_length; + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Calculate warped auto correlation */ + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); + } else { + /* Calculate regular auto correlation */ + silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); + } + + /* Add white noise, as a fraction of energy */ + auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), + SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); + + /* Calculate the reflection coefficients using schur */ + nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); + silk_assert( nrg >= 0 ); + + /* Convert reflection coefficients to prediction coefficients */ + silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); + + Qnrg = -scale; /* range: -12...30*/ + silk_assert( Qnrg >= -12 ); + silk_assert( Qnrg <= 30 ); + + /* Make sure that Qnrg is an even number */ + if( Qnrg & 1 ) { + Qnrg -= 1; + nrg >>= 1; + } + + tmp32 = silk_SQRT_APPROX( nrg ); + Qnrg >>= 1; /* range: -6...15*/ + + psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \ + silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) )); + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Adjust gain for warping */ + gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { + psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; + } else { + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + } + } + + /* Bandwidth expansion for synthesis filter shaping */ + silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); + + /* Compute noise shaping filter coefficients */ + silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); + + /* Bandwidth expansion for analysis filter shaping */ + silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); + silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); + + /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ + pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); + psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + + /* Convert from Q24 to Q13 and store in int16 */ + for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { + psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); + psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); + } + } + + /*****************/ + /* Gain tweaking */ + /*****************/ + /* Increase gains during low speech activity and put lower limit on gains */ + gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); + gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); + silk_assert( gain_mult_Q16 > 0 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); + } + + gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), + psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); + } + + /************************************************/ + /* Control low-frequency shaping and noise tilt */ + /************************************************/ + /* Less low frequency shaping for noisy inputs */ + strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), + SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); + strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ + /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ + opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); + /* Pack two coefficients in one int32 */ + psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); + psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); + } + silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ + Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - + silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), + silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); + } else { + b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ + /* Pack two coefficients in one int32 */ + psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - + silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); + psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); + for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; + } + Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); + } + + /****************************/ + /* HARMONIC SHAPING CONTROL */ + /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), + psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); + + /* More harmonic boost for noisy input signals */ + HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, + SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); + + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* More harmonic noise shaping for high bitrates or noisy input */ + HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), + SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), + psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); + + /* Less harmonic noise shaping for less periodic signals */ + HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), + silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); + } else { + HarmShapeGain_Q16 = 0; + } + + /*************************/ + /* Smooth over subframes */ + /*************************/ + for( k = 0; k < MAX_NB_SUBFR; k++ ) { + psShapeSt->HarmBoost_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psShapeSt->HarmShapeGain_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psShapeSt->Tilt_smth_Q16 = + silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + + psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); + psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); + psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); + } + RESTORE_STACK; +} diff --git a/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h new file mode 100644 index 0000000000..21b256885f --- /dev/null +++ b/media/libopus/silk/fixed/mips/prefilter_FIX_mipsr1.h @@ -0,0 +1,184 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ +#ifndef __PREFILTER_FIX_MIPSR1_H__ +#define __PREFILTER_FIX_MIPSR1_H__ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX +void silk_warped_LPC_analysis_filter_FIX( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order, /* I Filter order (even) */ + int arch +) +{ + opus_int n, i; + opus_int32 acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4; + opus_int32 state_cur, state_next; + + (void)arch; + + /* Order must be even */ + /* Length must be even */ + + silk_assert( ( order & 1 ) == 0 ); + silk_assert( ( length & 1 ) == 0 ); + + for( n = 0; n < length; n+=2 ) { + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); + state_cur = silk_LSHIFT( input[ n ], 14 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); + state_next = tmp2; + acc_Q11 = silk_RSHIFT( order, 1 ); + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); + + + /* Output of lowpass section */ + tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 ); + state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 ); + /* Output of allpass section */ + tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); + state[ 1 ] = tmp4; + acc_Q22 = silk_RSHIFT( order, 1 ); + acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] ); + + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); + state_cur = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); + state_next = tmp2; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); + + + /* Output of allpass section */ + tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 ); + state[ i ] = tmp3; + acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); + state[ i + 1 ] = tmp4; + acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] ); + } + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); + + state[ order ] = tmp3; + acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] ); + res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 ); + } +} + + + +/* Prefilter for finding Quantizer input signal */ +#define OVERRIDE_silk_prefilt_FIX +static inline void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +) +{ + opus_int i, idx, LTP_shp_buf_idx; + opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; + opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; + opus_int16 *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; + sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; + + if( lag > 0 ) { + for( i = 0; i < length; i++ ) { + /* unrolled loop */ + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + + n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); + n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); + + sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); + sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); + + xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); + } + } + else + { + for( i = 0; i < length; i++ ) { + + n_LTP_Q12 = 0; + + n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); + n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); + + sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); + sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); + + xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 ); + } + } + + /* Copy temp variable back to state */ + P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; + P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} + +#endif /* __PREFILTER_FIX_MIPSR1_H__ */ diff --git a/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h new file mode 100644 index 0000000000..e803ef0fce --- /dev/null +++ b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h @@ -0,0 +1,165 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ +#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" + +#undef QC +#define QC 10 + +#undef QS +#define QS 14 + +/* Autocorrelations for a warped frequency axis */ +#define OVERRIDE_silk_warped_autocorrelation_FIX +void silk_warped_autocorrelation_FIX( + opus_int32 *corr, /* O Result [order + 1] */ + opus_int *scale, /* O Scaling of the correlation vector */ + const opus_int16 *input, /* I Input data to correlate */ + const opus_int warping_Q16, /* I Warping coefficient */ + const opus_int length, /* I Length of input */ + const opus_int order /* I Correlation order (even) */ +) +{ + opus_int n, i, lsh; + opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0; + opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + opus_int64 temp64; + + opus_int32 val; + val = 2 * QS - QC; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + silk_assert( 2 * QS - QC >= 0 ); + + /* Loop over samples */ + for( n = 0; n < length; n=n+4 ) { + + tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); + start_1 = tmp1_QS; + tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS ); + start_2 = tmp3_QS; + tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS ); + start_3 = tmp5_QS; + tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS ); + + /* Loop over allpass sections */ + for( i = 0; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); + corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, start_1); + + tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 ); + corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp3_QS, start_2); + + tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 ); + corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp5_QS, start_3); + + tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 ); + state_QS[ i ] = tmp7_QS; + corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp7_QS, state_QS[0]); + + /* Output of allpass section */ + tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); + corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, start_1); + + tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 ); + corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp4_QS, start_2); + + tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 ); + corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp6_QS, start_3); + + tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 ); + state_QS[ i + 1 ] = tmp8_QS; + corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]); + + } + state_QS[ order ] = tmp7_QS; + + corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, start_1); + corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp3_QS, start_2); + corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp5_QS, start_3); + corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]); + } + + for(;n< length; n++ ) { + + tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); + + /* Loop over allpass sections */ + for( i = 0; i < order; i += 2 ) { + + /* Output of allpass section */ + tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); + state_QS[ i ] = tmp1_QS; + corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]); + + /* Output of allpass section */ + tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); + state_QS[ i + 1 ] = tmp2_QS; + corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]); + } + state_QS[ order ] = tmp1_QS; + corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]); + } + + temp64 = corr_QC[ 0 ]; + temp64 = __builtin_mips_shilo(temp64, val); + + lsh = silk_CLZ64( temp64 ) - 35; + lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); + *scale = -( QC + lsh ); + silk_assert( *scale >= -30 && *scale <= 12 ); + if( lsh >= 0 ) { + for( i = 0; i < order + 1; i++ ) { + temp64 = corr_QC[ i ]; + //temp64 = __builtin_mips_shilo(temp64, val); + temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); + corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); + } + } else { + for( i = 0; i < order + 1; i++ ) { + temp64 = corr_QC[ i ]; + //temp64 = __builtin_mips_shilo(temp64, val); + temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); + corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); + } + } + + corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val); + + silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ +} +#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */ diff --git a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c index e24d2e9d33..22a89f75ae 100644 --- a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c +++ b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c @@ -138,9 +138,14 @@ static OPUS_INLINE void limit_warped_coefs( silk_assert( 0 ); } +#if defined(MIPSr1_ASM) +#include "mips/noise_shape_analysis_FIX_mipsr1.h" +#endif + /**************************************************************/ /* Compute noise shaping coefficients and initial gain values */ /**************************************************************/ +#ifndef OVERRIDE_silk_noise_shape_analysis_FIX void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ @@ -443,3 +448,4 @@ void silk_noise_shape_analysis_FIX( } RESTORE_STACK; } +#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */ diff --git a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c index 1641a0fbcd..01bb9fc0a8 100644 --- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c +++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c @@ -72,7 +72,8 @@ static void silk_P_Ana_calc_energy_st3( opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of one 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ); /*************************************************************/ @@ -195,8 +196,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Calculate first vector products before loop */ cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; - normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ); - normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) ); + normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ); + normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) ); normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = @@ -334,7 +335,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( target_ptr >= frame_8kHz ); silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 ); + energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); for( j = 0; j < length_d_comp; j++ ) { d = d_comp[ j ]; basis_ptr = target_ptr - d; @@ -343,9 +344,9 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( basis_ptr >= frame_8kHz ); silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ ); + cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); if( cross_corr > 0 ) { - energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ); + energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = (opus_int16)silk_DIV32_varQ( cross_corr, silk_ADD32( energy_target, @@ -519,14 +520,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); + silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); lag_counter = 0; silk_assert( lag == silk_SAT16( lag ) ); contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 ); + energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); for( d = start_lag; d <= end_lag; d++ ) { for( j = 0; j < nb_cbk_search; j++ ) { cross_corr = 0; @@ -671,7 +672,8 @@ static void silk_P_Ana_calc_energy_st3( opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of one 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ) { const opus_int16 *target_ptr, *basis_ptr; @@ -705,7 +707,7 @@ static void silk_P_Ana_calc_energy_st3( /* Calculate the energy for first lag */ basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); - energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length ); + energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch ); silk_assert( energy >= 0 ); scratch_mem[ lag_counter ] = energy; lag_counter++; diff --git a/media/libopus/silk/fixed/prefilter_FIX.c b/media/libopus/silk/fixed/prefilter_FIX.c index d381730c28..6a8e35152e 100644 --- a/media/libopus/silk/fixed/prefilter_FIX.c +++ b/media/libopus/silk/fixed/prefilter_FIX.c @@ -33,6 +33,16 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" #include "tuning_parameters.h" +#if defined(MIPSr1_ASM) +#include "mips/prefilter_FIX_mipsr1.h" +#endif + + +#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) +#endif + /* Prefilter for finding Quantizer input signal */ static OPUS_INLINE void silk_prefilt_FIX( silk_prefilter_state_FIX *P, /* I/O state */ @@ -45,7 +55,7 @@ static OPUS_INLINE void silk_prefilt_FIX( opus_int length /* I Length of signals */ ); -void silk_warped_LPC_analysis_filter_FIX( +void silk_warped_LPC_analysis_filter_FIX_c( opus_int32 state[], /* I/O State [order + 1] */ opus_int32 res_Q2[], /* O Residual signal [length] */ const opus_int16 coef_Q13[], /* I Coefficients [order] */ @@ -130,7 +140,7 @@ void silk_prefilter_FIX( /* Short term FIR filtering*/ silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, - psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); + psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch ); /* Reduce (mainly) low frequencies during harmonic emphasis */ B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); @@ -155,6 +165,7 @@ void silk_prefilter_FIX( RESTORE_STACK; } +#ifndef OVERRIDE_silk_prefilt_FIX /* Prefilter for finding Quantizer input signal */ static OPUS_INLINE void silk_prefilt_FIX( silk_prefilter_state_FIX *P, /* I/O state */ @@ -207,3 +218,4 @@ static OPUS_INLINE void silk_prefilt_FIX( P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; P->sLTP_shp_buf_idx = LTP_shp_buf_idx; } +#endif /* OVERRIDE_silk_prefilt_FIX */ diff --git a/media/libopus/silk/fixed/residual_energy_FIX.c b/media/libopus/silk/fixed/residual_energy_FIX.c index 105ae31807..41f74778e8 100644 --- a/media/libopus/silk/fixed/residual_energy_FIX.c +++ b/media/libopus/silk/fixed/residual_energy_FIX.c @@ -42,7 +42,8 @@ void silk_residual_energy_FIX( const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ const opus_int subfr_length, /* I Subframe length */ const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order /* I LPC order */ + const opus_int LPC_order, /* I LPC order */ + int arch /* I Run-time architecture */ ) { opus_int offset, i, j, rshift, lz1, lz2; @@ -60,7 +61,7 @@ void silk_residual_energy_FIX( silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); for( i = 0; i < nb_subfr >> 1; i++ ) { /* Calculate half frame LPC residual signal including preceding samples */ - silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order ); + silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch ); /* Point to first subframe of the just calculated LPC residual signal */ LPC_res_ptr = LPC_res + LPC_order; diff --git a/media/libopus/silk/fixed/structs_FIX.h b/media/libopus/silk/fixed/structs_FIX.h index 244b479344..3294b25128 100644 --- a/media/libopus/silk/fixed/structs_FIX.h +++ b/media/libopus/silk/fixed/structs_FIX.h @@ -116,6 +116,7 @@ typedef struct { typedef struct { silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ]; stereo_enc_state sStereo; + opus_int32 nBitsUsedLBRR; opus_int32 nBitsExceeded; opus_int nChannelsAPI; opus_int nChannelsInternal; diff --git a/media/libopus/silk/fixed/vector_ops_FIX.c b/media/libopus/silk/fixed/vector_ops_FIX.c index 509c8b35a1..d94980014f 100644 --- a/media/libopus/silk/fixed/vector_ops_FIX.c +++ b/media/libopus/silk/fixed/vector_ops_FIX.c @@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE. #endif #include "SigProc_FIX.h" +#include "pitch.h" /* Copy and multiply a vector by a constant */ void silk_scale_copy_vector16( @@ -70,18 +71,23 @@ void silk_scale_vector32_Q26_lshift_18( opus_int32 silk_inner_prod_aligned( const opus_int16 *const inVec1, /* I input vector 1 */ const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ + const opus_int len, /* I vector lengths */ + int arch /* I Run-time architecture */ ) { +#ifdef FIXED_POINT + return celt_inner_prod(inVec1, inVec2, len, arch); +#else opus_int i; opus_int32 sum = 0; for( i = 0; i < len; i++ ) { sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); } return sum; +#endif } -opus_int64 silk_inner_prod16_aligned_64( +opus_int64 silk_inner_prod16_aligned_64_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ diff --git a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c index a4a579b10d..6ca6c1184d 100644 --- a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c +++ b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c @@ -34,6 +34,12 @@ POSSIBILITY OF SUCH DAMAGE. #define QC 10 #define QS 14 +#if defined(MIPSr1_ASM) +#include "mips/warped_autocorrelation_FIX_mipsr1.h" +#endif + + +#ifndef OVERRIDE_silk_warped_autocorrelation_FIX /* Autocorrelations for a warped frequency axis */ void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ @@ -86,3 +92,4 @@ void silk_warped_autocorrelation_FIX( } silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ } +#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */ diff --git a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c new file mode 100644 index 0000000000..3756095fbe --- /dev/null +++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c @@ -0,0 +1,375 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "SigProc_FIX.h" +#include "define.h" +#include "tuning_parameters.h" +#include "pitch.h" +#include "celt/x86/x86cpu.h" + +#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ + +#define QA 25 +#define N_BITS_HEAD_ROOM 2 +#define MIN_RSHIFTS -16 +#define MAX_RSHIFTS (32 - QA) + +/* Compute reflection coefficients from input signal */ +void silk_burg_modified_sse4_1( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ +) +{ + opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; + opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; + const opus_int16 *x_ptr; + opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; + opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; + opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; + opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; + opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; + opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; + + __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; + __m128i CONST1 = _mm_set1_epi32(1); + + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + + /* Compute autocorrelations, added over subframes */ + silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); + if( rshifts > MAX_RSHIFTS ) { + C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); + silk_assert( C0 > 0 ); + rshifts = MAX_RSHIFTS; + } else { + lz = silk_CLZ32( C0 ) - 1; + rshifts_extra = N_BITS_HEAD_ROOM - lz; + if( rshifts_extra > 0 ) { + rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); + C0 = silk_RSHIFT32( C0, rshifts_extra ); + } else { + rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); + C0 = silk_LSHIFT32( C0, -rshifts_extra ); + } + rshifts += rshifts_extra; + } + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); + if( rshifts > 0 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + for( n = 1; n < D + 1; n++ ) { + C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( + silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); + } + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + int i; + opus_int32 d; + x_ptr = x + s * subfr_length; + celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); + for( n = 1; n < D + 1; n++ ) { + for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) + d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); + xcorr[ n - 1 ] += d; + } + for( n = 1; n < D + 1; n++ ) { + C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); + } + } + } + silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); + + /* Initialize */ + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + + invGain_Q30 = (opus_int32)1 << 30; + reached_max_gain = 0; + for( n = 0; n < D; n++ ) { + /* Update first row of correlation matrix (without first element) */ + /* Update last row of correlation matrix (without last element, stored in reversed order) */ + /* Update C * Af */ + /* Update C * flipud(Af) (stored in reversed order) */ + if( rshifts > -2 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ + x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ + tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ + tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ + for( k = 0; k < n; k++ ) { + C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ + C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + Atmp_QA = Af_QA[ k ]; + tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ + tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ + } + tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ + tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ + for( k = 0; k <= n; k++ ) { + CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ + CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ + } + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ + x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ + tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ + tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ + + X1_3210 = _mm_set1_epi32( x1 ); + X2_3210 = _mm_set1_epi32( x2 ); + TMP1_3210 = _mm_setzero_si128(); + TMP2_3210 = _mm_setzero_si128(); + for( k = 0; k < n - 3; k += 4 ) { + PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] ); + SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] ); + FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] ); + PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] ); + ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] ); + + T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); + T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); + + ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 ); + ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 ); + ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 ); + + FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 ); + LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 ); + + PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); + SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); + + _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 ); + _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 ); + + TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 ); + TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 ); + } + + TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) ); + TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) ); + TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) ); + TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) ); + + tmp1 += _mm_cvtsi128_si32( TMP1_3210 ); + tmp2 += _mm_cvtsi128_si32( TMP2_3210 ); + + for( ; k < n; k++ ) { + C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ + C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ + tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ + tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ + } + + tmp1 = -tmp1; /* Q17 */ + tmp2 = -tmp2; /* Q17 */ + + { + __m128i xmm_tmp1, xmm_tmp2; + __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; + __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; + + xmm_tmp1 = _mm_set1_epi32( tmp1 ); + xmm_tmp2 = _mm_set1_epi32( tmp2 ); + + for( k = 0; k <= n - 3; k += 4 ) { + xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] ); + xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] ); + + xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + + xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 ); + xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 ); + + /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ + xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 ); + xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 ); + xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 ); + xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 ); + + xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 ); + xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 ); + xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 ); + xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 ); + + xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC ); + xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC ); + + X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] ); + PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] ); + + X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 ); + PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 ); + + _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 ); + _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 ); + } + + for( ; k <= n; k++ ) { + CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, + silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ + CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, + silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ + } + } + } + } + + /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ + tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ + tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ + num = 0; /* Q( -rshifts ) */ + nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ + for( k = 0; k < n; k++ ) { + Atmp_QA = Af_QA[ k ]; + lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; + lz = silk_min( 32 - QA, lz ); + Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ + + tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), + Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ + } + CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ + CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ + num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ + num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ + + /* Calculate the next order reflection (parcor) coefficient */ + if( silk_abs( num ) < nrg ) { + rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); + } else { + rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; + } + + /* Update inverse prediction gain */ + tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); + tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); + if( tmp1 <= minInvGain_Q30 ) { + /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ + tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ + rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ + /* Newton-Raphson iteration */ + rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ + rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ + if( num < 0 ) { + /* Ensure adjusted reflection coefficients has the original sign */ + rc_Q31 = -rc_Q31; + } + invGain_Q30 = minInvGain_Q30; + reached_max_gain = 1; + } else { + invGain_Q30 = tmp1; + } + + /* Update the AR coefficients */ + for( k = 0; k < (n + 1) >> 1; k++ ) { + tmp1 = Af_QA[ k ]; /* QA */ + tmp2 = Af_QA[ n - k - 1 ]; /* QA */ + Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ + Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ + } + Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ + + if( reached_max_gain ) { + /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ + for( k = n + 1; k < D; k++ ) { + Af_QA[ k ] = 0; + } + break; + } + + /* Update C * Af and C * Ab */ + for( k = 0; k <= n + 1; k++ ) { + tmp1 = CAf[ k ]; /* Q( -rshifts ) */ + tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ + CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ + CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ + } + } + + if( reached_max_gain ) { + for( k = 0; k < D; k++ ) { + /* Scale coefficients */ + A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); + } + /* Subtract energy of preceding samples from C0 */ + if( rshifts > 0 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts ); + } + } + /* Approximate residual energy */ + *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); + *res_nrg_Q = -rshifts; + } else { + /* Return residual energy */ + nrg = CAf[ 0 ]; /* Q( -rshifts ) */ + tmp1 = (opus_int32)1 << 16; /* Q16 */ + for( k = 0; k < D; k++ ) { + Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ + nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ + tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ + A_Q16[ k ] = -Atmp1; + } + *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ + *res_nrg_Q = -rshifts; + } +} diff --git a/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c b/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c new file mode 100644 index 0000000000..488a603f5d --- /dev/null +++ b/media/libopus/silk/fixed/x86/prefilter_FIX_sse.c @@ -0,0 +1,160 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "main.h" +#include "celt/x86/x86cpu.h" + +void silk_warped_LPC_analysis_filter_FIX_sse4_1( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + opus_int32 acc_Q11, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + if (order == 10) + { + if (0 == lambda_Q16) + { + __m128i coef_Q13_3210, coef_Q13_7654; + __m128i coef_Q13_0123, coef_Q13_4567; + __m128i state_0123, state_4567; + __m128i xmm_product1, xmm_product2; + __m128i xmm_tempa, xmm_tempb; + + register opus_int32 sum; + register opus_int32 state_8, state_9, state_a; + register opus_int64 coef_Q13_8, coef_Q13_9; + + silk_assert( length > 0 ); + + coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); + coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); + + coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + + coef_Q13_8 = (opus_int64) coef_Q13[ 8 ]; + coef_Q13_9 = (opus_int64) coef_Q13[ 9 ]; + + state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) ); + state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) ); + + state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + + state_8 = state[ 8 ]; + state_9 = state[ 9 ]; + state_a = 0; + + for( n = 0; n < length; n++ ) + { + xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ + xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); + + xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); + + xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */ + xmm_product2 = _mm_srli_epi64( xmm_product2, 16 ); + + xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); + xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); + + xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 ); + xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 ); + + xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 ); + xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); + xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); + + sum = (coef_Q13_8 * state_8) >> 16; + sum += (coef_Q13_9 * state_9) >> 16; + + xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); + sum += _mm_cvtsi128_si32( xmm_tempa); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9); + + /* move right */ + state_a = state_9; + state_9 = state_8; + state_8 = _mm_cvtsi128_si32( state_4567 ); + state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 ); + + state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 ); + } + + _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); + _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); + state[ 8 ] = state_8; + state[ 9 ] = state_9; + state[ 10 ] = state_a; + + return; + } + } + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); + state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); + state[ 1 ] = tmp2; + acc_Q11 = silk_RSHIFT( order, 1 ); + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); + state[ i ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); + state[ i + 1 ] = tmp2; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); + } + state[ order ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); + } +} diff --git a/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c new file mode 100644 index 0000000000..c1e90564d0 --- /dev/null +++ b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "main.h" + +#include "SigProc_FIX.h" +#include "pitch.h" + +opus_int64 silk_inner_prod16_aligned_64_sse4_1( + const opus_int16 *inVec1, /* I input vector 1 */ + const opus_int16 *inVec2, /* I input vector 2 */ + const opus_int len /* I vector lengths */ +) +{ + opus_int i, dataSize8; + opus_int64 sum; + + __m128i xmm_tempa; + __m128i inVec1_76543210, acc1; + __m128i inVec2_76543210, acc2; + + sum = 0; + dataSize8 = len & ~7; + + acc1 = _mm_setzero_si128(); + acc2 = _mm_setzero_si128(); + + for( i = 0; i < dataSize8; i += 8 ) { + inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) ); + inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) ); + + /* only when all 4 operands are -32768 (0x8000), this results in wrap around */ + inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 ); + + xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 ); + /* equal shift right 8 bytes */ + inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) ); + inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 ); + + acc1 = _mm_add_epi64( acc1, xmm_tempa ); + acc2 = _mm_add_epi64( acc2, inVec1_76543210 ); + } + + acc1 = _mm_add_epi64( acc1, acc2 ); + + /* equal shift right 8 bytes */ + acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) ); + acc1 = _mm_add_epi64( acc1, acc2 ); + + _mm_storel_epi64( (__m128i *)&sum, acc1 ); + + for( ; i < len; i++ ) { + sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); + } + + return sum; +} diff --git a/media/libopus/silk/float/encode_frame_FLP.c b/media/libopus/silk/float/encode_frame_FLP.c index d54e2686e5..2092a4d9e2 100644 --- a/media/libopus/silk/float/encode_frame_FLP.c +++ b/media/libopus/silk/float/encode_frame_FLP.c @@ -47,7 +47,7 @@ void silk_encode_do_VAD_FLP( /****************************/ /* Voice Activity Detection */ /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); + silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ diff --git a/media/libopus/silk/float/find_LPC_FLP.c b/media/libopus/silk/float/find_LPC_FLP.c index 61c1ad9554..fcfe1c3681 100644 --- a/media/libopus/silk/float/find_LPC_FLP.c +++ b/media/libopus/silk/float/find_LPC_FLP.c @@ -99,6 +99,6 @@ void silk_find_LPC_FLP( silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder ); } - silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); } diff --git a/media/libopus/silk/float/find_pred_coefs_FLP.c b/media/libopus/silk/float/find_pred_coefs_FLP.c index ea2c6c432a..1af4fe5f1b 100644 --- a/media/libopus/silk/float/find_pred_coefs_FLP.c +++ b/media/libopus/silk/float/find_pred_coefs_FLP.c @@ -67,7 +67,8 @@ void silk_find_pred_coefs_FLP( /* Quantize LTP gain parameters */ silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch ); /* Control LTP scaling */ silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); @@ -90,13 +91,13 @@ void silk_find_pred_coefs_FLP( } silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) ); psEncCtrl->LTPredCodGain = 0.0f; - psEnc->sCmn.sum_log_gain_Q7 = 0; + psEnc->sCmn.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if( psEnc->sCmn.first_frame_after_reset ) { minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET; - } else { + } else { minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN; minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality; } diff --git a/media/libopus/silk/float/main_FLP.h b/media/libopus/silk/float/main_FLP.h index fb553b61aa..e5a75972e5 100644 --- a/media/libopus/silk/float/main_FLP.h +++ b/media/libopus/silk/float/main_FLP.h @@ -205,7 +205,8 @@ void silk_quant_LTP_gains_FLP( const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ); /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ diff --git a/media/libopus/silk/float/pitch_analysis_core_FLP.c b/media/libopus/silk/float/pitch_analysis_core_FLP.c index e58f041bd6..d0e637a29d 100644 --- a/media/libopus/silk/float/pitch_analysis_core_FLP.c +++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c @@ -182,8 +182,8 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /* Calculate first vector products before loop */ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; - normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + - silk_energy_FLP( basis_ptr, sf_length_8kHz ) + + normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + + silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f; C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); diff --git a/media/libopus/silk/float/structs_FLP.h b/media/libopus/silk/float/structs_FLP.h index bb529e71a4..14d647ced2 100644 --- a/media/libopus/silk/float/structs_FLP.h +++ b/media/libopus/silk/float/structs_FLP.h @@ -115,6 +115,7 @@ typedef struct { typedef struct { silk_encoder_state_FLP state_Fxx[ ENCODER_NUM_CHANNELS ]; stereo_enc_state sStereo; + opus_int32 nBitsUsedLBRR; opus_int32 nBitsExceeded; opus_int nChannelsAPI; opus_int nChannelsInternal; diff --git a/media/libopus/silk/float/wrappers_FLP.c b/media/libopus/silk/float/wrappers_FLP.c index 350599b20c..6666b8efaa 100644 --- a/media/libopus/silk/float/wrappers_FLP.c +++ b/media/libopus/silk/float/wrappers_FLP.c @@ -161,10 +161,10 @@ void silk_NSQ_wrapper_FLP( /* Call NSQ */ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } else { silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } } @@ -179,7 +179,8 @@ void silk_quant_LTP_gains_FLP( const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ) { opus_int i; @@ -193,7 +194,7 @@ void silk_quant_LTP_gains_FLP( W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); } - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr ); + silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch ); for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); diff --git a/media/libopus/silk/log2lin.c b/media/libopus/silk/log2lin.c index a692e009db..b7c48e4740 100644 --- a/media/libopus/silk/log2lin.c +++ b/media/libopus/silk/log2lin.c @@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE. /* Approximation of 2^() (very close inverse of silk_lin2log()) */ /* Convert input to a linear scale */ -opus_int32 silk_log2lin( +opus_int32 silk_log2lin( const opus_int32 inLog_Q7 /* I input on log scale */ ) { @@ -42,8 +42,8 @@ opus_int32 silk_log2lin( if( inLog_Q7 < 0 ) { return 0; } else if ( inLog_Q7 >= 3967 ) { - return silk_int32_MAX; - } + return silk_int32_MAX; + } out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) ); frac_Q7 = inLog_Q7 & 0x7F; diff --git a/media/libopus/silk/macros.h b/media/libopus/silk/macros.h index a84e5a5d3e..bc30303466 100644 --- a/media/libopus/silk/macros.h +++ b/media/libopus/silk/macros.h @@ -35,19 +35,42 @@ POSSIBILITY OF SUCH DAMAGE. #include "opus_types.h" #include "opus_defines.h" +#if OPUS_GNUC_PREREQ(3, 0) +#define opus_likely(x) (__builtin_expect(!!(x), 1)) +#define opus_unlikely(x) (__builtin_expect(!!(x), 0)) +#else +#define opus_likely(x) (!!(x)) +#define opus_unlikely(x) (!!(x)) +#endif + +/* Set this if opus_int64 is a native type of the CPU. */ +#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)) + /* This is an OPUS_INLINE header file for general platform. */ /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#if OPUS_FAST_INT64 +#define silk_SMULWB(a32, b32) (((a32) * (opus_int64)((opus_int16)(b32))) >> 16) +#else #define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) +#endif /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#if OPUS_FAST_INT64 +#define silk_SMLAWB(a32, b32, c32) ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)) +#else #define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) +#endif /* (a32 * (b32 >> 16)) >> 16 */ #define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) /* a32 + (b32 * (c32 >> 16)) >> 16 */ +#if OPUS_FAST_INT64 +#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)) +#else #define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) +#endif /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ #define silk_SMULBB(a32, b32) ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32))) @@ -65,10 +88,18 @@ POSSIBILITY OF SUCH DAMAGE. #define silk_SMLAL(a64, b32, c32) (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32)))) /* (a32 * b32) >> 16 */ +#if OPUS_FAST_INT64 +#define silk_SMULWW(a32, b32) (((opus_int64)(a32) * (b32)) >> 16) +#else #define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) +#endif /* a32 + ((b32 * c32) >> 16) */ +#if OPUS_FAST_INT64 +#define silk_SMLAWW(a32, b32, c32) ((a32) + (((opus_int64)(b32) * (c32)) >> 16)) +#else #define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) +#endif /* add/subtract with output saturated */ #define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \ @@ -79,17 +110,24 @@ POSSIBILITY OF SUCH DAMAGE. (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) -#include "ecintrin.h" +#if defined(MIPSr1_ASM) +#include "mips/macros_mipsr1.h" +#endif +#include "ecintrin.h" +#ifndef OVERRIDE_silk_CLZ16 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) { return 32 - EC_ILOG(in16<<16|0x8000); } +#endif +#ifndef OVERRIDE_silk_CLZ32 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) { return in32 ? 32 - EC_ILOG(in32) : 32; } +#endif /* Row based */ #define matrix_ptr(Matrix_base_adr, row, column, N) \ diff --git a/media/libopus/silk/main.h b/media/libopus/silk/main.h index 2bdf89784d..2f90d68f7d 100644 --- a/media/libopus/silk/main.h +++ b/media/libopus/silk/main.h @@ -38,6 +38,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "entenc.h" #include "entdec.h" +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) +#include "x86/main_sse.h" +#endif + /* Convert Left/Right stereo signal to adaptive Mid/Side representation */ void silk_stereo_LR_to_MS( stereo_enc_state *state, /* I/O State */ @@ -116,7 +120,7 @@ void silk_encode_signs( /* Decodes signs of excitation */ void silk_decode_signs( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* I/O pulse signal */ + opus_int16 pulses[], /* I/O pulse signal */ opus_int length, /* I length of input */ const opus_int signalType, /* I Signal type */ const opus_int quantOffsetType, /* I Quantization offset type */ @@ -161,7 +165,7 @@ void silk_shell_encoder( /* Shell decoder, operates on one shell code frame of 16 pulses */ void silk_shell_decoder( - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int pulses4 /* I number of pulses per pulse-subframe */ ); @@ -204,15 +208,16 @@ void silk_quant_LTP_gains( opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ + opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ); /* Entropy constrained matrix-weighted VQ, for a single input data vector */ -void silk_VQ_WMat_EC( +void silk_VQ_WMat_EC_c( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ @@ -226,10 +231,18 @@ void silk_VQ_WMat_EC( opus_int L /* I number of vectors in codebook */ ); +#if !defined(OVERRIDE_silk_VQ_WMat_EC) +#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L, arch) \ + ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L)) +#endif + /************************************/ /* Noise shaping quantization (NSQ) */ /************************************/ -void silk_NSQ( + +void silk_NSQ_c( const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ @@ -247,8 +260,15 @@ void silk_NSQ( const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); +#if !defined(OVERRIDE_silk_NSQ) +#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) +#endif + /* Noise shaping using delayed decision */ -void silk_NSQ_del_dec( +void silk_NSQ_del_dec_c( const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ @@ -266,6 +286,13 @@ void silk_NSQ_del_dec( const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); +#if !defined(OVERRIDE_silk_NSQ_del_dec) +#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) +#endif + /************/ /* Silk VAD */ /************/ @@ -275,11 +302,15 @@ opus_int silk_VAD_Init( /* O Return v ); /* Get speech activity level in Q8 */ -opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ +opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ silk_encoder_state *psEncC, /* I/O Encoder state */ const opus_int16 pIn[] /* I PCM input */ ); +#if !defined(OVERRIDE_silk_VAD_GetSA_Q8) +#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_c(psEnC, pIn)) +#endif + /* Low-pass filter with variable cutoff frequency based on */ /* piece-wise linear interpolation between elliptic filters */ /* Start by setting transition_frame_no = 1; */ @@ -373,7 +404,8 @@ opus_int silk_decode_frame( opus_int16 pOut[], /* O Pointer to output speech frame */ opus_int32 *pN, /* O Pointer to size of output frame */ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding /* I The type of conditional coding to use */ + opus_int condCoding, /* I The type of conditional coding to use */ + int arch /* I Run-time architecture */ ); /* Decode indices from bitstream */ @@ -397,13 +429,14 @@ void silk_decode_core( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I Decoder control */ opus_int16 xq[], /* O Decoded speech */ - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ + const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ + int arch /* I Run-time architecture */ ); /* Decode quantization indices of excitation (Shell coding) */ void silk_decode_pulses( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* O Excitation signal */ + opus_int16 pulses[], /* O Excitation signal */ const opus_int signalType, /* I Sigtype */ const opus_int quantOffsetType, /* I quantOffsetType */ const opus_int frame_length /* I Frame length */ diff --git a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h new file mode 100644 index 0000000000..f6afd923e8 --- /dev/null +++ b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h @@ -0,0 +1,405 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef __NSQ_DEL_DEC_MIPSR1_H__ +#define __NSQ_DEL_DEC_MIPSR1_H__ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main.h" +#include "stack_alloc.h" + +#define OVERRIDE_silk_noise_shape_quantizer_del_dec +static inline void silk_noise_shape_quantizer_del_dec( + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP filter state */ + opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int subfr, /* I Subframe number */ + opus_int shapingLPCOrder, /* I Shaping LPC filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + opus_int warping_Q16, /* I */ + opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ + opus_int decisionDelay /* I */ +) +{ + opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; + opus_int32 Winner_rand_state; + opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; + opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; + opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; + opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; + NSQ_sample_struct psSampleState[ MAX_DEL_DEC_STATES ][ 2 ]; + NSQ_del_dec_struct *psDD; + NSQ_sample_struct *psSS; + opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4; + opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6; + opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13; + opus_int16 a_Q12_14, a_Q12_15; + + opus_int32 cur, prev, next; + + //Intialize b_Q14 variables + b_Q14_0 = b_Q14[ 0 ]; + b_Q14_1 = b_Q14[ 1 ]; + b_Q14_2 = b_Q14[ 2 ]; + b_Q14_3 = b_Q14[ 3 ]; + b_Q14_4 = b_Q14[ 4 ]; + + //Intialize a_Q12 variables + a_Q12_0 = a_Q12[0]; + a_Q12_1 = a_Q12[1]; + a_Q12_2 = a_Q12[2]; + a_Q12_3 = a_Q12[3]; + a_Q12_4 = a_Q12[4]; + a_Q12_5 = a_Q12[5]; + a_Q12_6 = a_Q12[6]; + a_Q12_7 = a_Q12[7]; + a_Q12_8 = a_Q12[8]; + a_Q12_9 = a_Q12[9]; + a_Q12_10 = a_Q12[10]; + a_Q12_11 = a_Q12[11]; + a_Q12_12 = a_Q12[12]; + a_Q12_13 = a_Q12[13]; + a_Q12_14 = a_Q12[14]; + a_Q12_15 = a_Q12[15]; + + long long temp64; + + silk_assert( nStatesDelayedDecision > 0 ); + + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; + pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); + + for( i = 0; i < length; i++ ) { + /* Perform common calculations used in all states */ + + /* Long-term prediction */ + if( signalType == TYPE_VOICED ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 ); + temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 ); + temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 ); + temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 ); + temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 ); + temp64 += 32768; + LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16); + LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ + pred_lag_ptr++; + } else { + LTP_pred_Q14 = 0; + } + + /* Long-term shaping */ + if( lag > 0 ) { + /* Symmetric, packed FIR coefficients */ + n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ + shp_lag_ptr++; + } else { + n_LTP_Q14 = 0; + } + + for( k = 0; k < nStatesDelayedDecision; k++ ) { + /* Delayed decision state */ + psDD = &psDelDec[ k ]; + + /* Sample state */ + psSS = psSampleState[ k ]; + + /* Generate dither */ + psDD->Seed = silk_RAND( psDD->Seed ); + + /* Pointer used in short term prediction and shaping */ + psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; + /* Short-term prediction */ + silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); + temp64 = __builtin_mips_mult(psLPC_Q14[ 0 ], a_Q12_0 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 ); + if( predictLPCOrder == 16 ) { + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 ); + temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 ); + } + temp64 += 32768; + LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16); + + LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ + + /* Noise shape feedback */ + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ 0 ] = tmp2; + + temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] ); + + prev = psDD->sAR2_Q14[ 1 ]; + + /* Loop over allpass sections */ + for( j = 2; j < shapingLPCOrder; j += 2 ) { + cur = psDD->sAR2_Q14[ j ]; + next = psDD->sAR2_Q14[ j+1 ]; + /* Output of allpass section */ + tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 ); + psDD->sAR2_Q14[ j - 1 ] = tmp1; + temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] ); + temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ j + 0 ] = tmp2; + prev = next; + } + psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; + temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); + temp64 += 32768; + n_AR_Q14 = __builtin_mips_extr_w(temp64, 16); + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ + + n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ + + /* Input minus prediction plus noise feedback */ + /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ + tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ + tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ + + r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ + + /* Flip sign depending on dither */ + if ( psDD->Seed < 0 ) { + r_Q10 = -r_Q10; + } + r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); + + /* Find two quantization level candidates and measure their rate-distortion */ + q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); + q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if( q1_Q0 > 0 ) { + q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == 0 ) { + q1_Q10 = offset_Q10; + q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == -1 ) { + q2_Q10 = offset_Q10; + q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else { /* q1_Q0 < -1 */ + q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); + } + rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); + rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); + rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); + rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); + + if( rd1_Q10 < rd2_Q10 ) { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 0 ].Q_Q10 = q1_Q10; + psSS[ 1 ].Q_Q10 = q2_Q10; + } else { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 0 ].Q_Q10 = q2_Q10; + psSS[ 1 ].Q_Q10 = q1_Q10; + } + + /* Update states for best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 0 ].xq_Q14 = xq_Q14; + + /* Update states for second best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 1 ].xq_Q14 = xq_Q14; + } + + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ + + /* Find winner */ + RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + Winner_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + Winner_ind = k; + } + } + + /* Increase RD values of expired states */ + Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; + for( k = 0; k < nStatesDelayedDecision; k++ ) { + if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { + psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); + psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); + silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); + } + } + + /* Find worst in first set and best in second set */ + RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; + RDmax_ind = 0; + RDmin_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + /* find worst in first set */ + if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { + RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + RDmax_ind = k; + } + /* find best in second set */ + if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; + RDmin_ind = k; + } + } + + /* Replace a state if best from second set outperforms worst in first set */ + if( RDmin_Q10 < RDmax_Q10 ) { + silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, + ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); + silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); + } + + /* Write samples from winner to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + if( subfr > 0 || i >= decisionDelay ) { + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; + sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; + } + NSQ->sLTP_shp_buf_idx++; + NSQ->sLTP_buf_idx++; + + /* Update states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + psSS = &psSampleState[ k ][ 0 ]; + psDD->LF_AR_Q14 = psSS->LF_AR_Q14; + psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; + psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; + psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; + psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); + psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; + psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); + psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; + psDD->RD_Q10 = psSS->RD_Q10; + } + delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; + } + /* Update LPC states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + } +} + +#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */ diff --git a/media/libopus/silk/mips/macros_mipsr1.h b/media/libopus/silk/mips/macros_mipsr1.h new file mode 100644 index 0000000000..12ed981a6e --- /dev/null +++ b/media/libopus/silk/mips/macros_mipsr1.h @@ -0,0 +1,92 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + + +#ifndef __SILK_MACROS_MIPSR1_H__ +#define __SILK_MACROS_MIPSR1_H__ + +#define mips_clz(x) __builtin_clz(x) + +#undef silk_SMULWB +static inline int silk_SMULWB(int a, int b) +{ + long long ac; + int c; + + ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b); + c = __builtin_mips_extr_w(ac, 16); + + return c; +} + +#undef silk_SMLAWB +#define silk_SMLAWB(a32, b32, c32) ((a32) + silk_SMULWB(b32, c32)) + +#undef silk_SMULWW +static inline int silk_SMULWW(int a, int b) +{ + long long ac; + int c; + + ac = __builtin_mips_mult(a, b); + c = __builtin_mips_extr_w(ac, 16); + + return c; +} + +#undef silk_SMLAWW +static inline int silk_SMLAWW(int a, int b, int c) +{ + long long ac; + int res; + + ac = __builtin_mips_mult(b, c); + res = __builtin_mips_extr_w(ac, 16); + res += a; + + return res; +} + +#define OVERRIDE_silk_CLZ16 +static inline opus_int32 silk_CLZ16(opus_int16 in16) +{ + int re32; + opus_int32 in32 = (opus_int32 )in16; + re32 = mips_clz(in32); + re32-=16; + return re32; +} + +#define OVERRIDE_silk_CLZ32 +static inline opus_int32 silk_CLZ32(opus_int32 in32) +{ + int re32; + re32 = mips_clz(in32); + return re32; +} + +#endif /* __SILK_MACROS_MIPSR1_H__ */ diff --git a/media/libopus/silk/mips/sigproc_fix_mipsr1.h b/media/libopus/silk/mips/sigproc_fix_mipsr1.h new file mode 100644 index 0000000000..3b0a695365 --- /dev/null +++ b/media/libopus/silk/mips/sigproc_fix_mipsr1.h @@ -0,0 +1,65 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_MIPSR1_H +#define SILK_SIGPROC_FIX_MIPSR1_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#undef silk_SAT16 +static inline short int silk_SAT16(int a) +{ + int c; + c = __builtin_mips_shll_s_w(a, 16); + c = c>>16; + + return c; +} + +#undef silk_LSHIFT_SAT32 +static inline int silk_LSHIFT_SAT32(int a, int shift) +{ + int r; + + r = __builtin_mips_shll_s_w(a, shift); + + return r; +} + +#undef silk_RSHIFT_ROUND +static inline int silk_RSHIFT_ROUND(int a, int shift) +{ + int r; + + r = __builtin_mips_shra_r_w(a, shift); + return r; +} + +#endif /* SILK_SIGPROC_FIX_MIPSR1_H */ diff --git a/media/libopus/silk/quant_LTP_gains.c b/media/libopus/silk/quant_LTP_gains.c index fd0870da19..513a8c4468 100644 --- a/media/libopus/silk/quant_LTP_gains.c +++ b/media/libopus/silk/quant_LTP_gains.c @@ -36,11 +36,12 @@ void silk_quant_LTP_gains( opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ) { opus_int j, k, cbk_size; @@ -51,7 +52,7 @@ void silk_quant_LTP_gains( const opus_int16 *b_Q14_ptr; const opus_int32 *W_Q18_ptr; opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; - opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; + opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; /***************************************************/ /* iterate over different codebooks with different */ @@ -74,23 +75,24 @@ void silk_quant_LTP_gains( b_Q14_ptr = B_Q14; rate_dist_Q14 = 0; - sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; + sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; for( j = 0; j < nb_subfr; j++ ) { - max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) - + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; + max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) + + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; silk_VQ_WMat_EC( &temp_idx[ j ], /* O index of best codebook vector */ &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ - &gain_Q7, /* O sum of absolute LTP coefficients */ + &gain_Q7, /* O sum of absolute LTP coefficients */ b_Q14_ptr, /* I input vector to be quantized */ W_Q18_ptr, /* I weighting matrix */ cbk_ptr_Q7, /* I codebook */ cbk_gain_ptr_Q7, /* I codebook effective gains */ cl_ptr_Q5, /* I code length for each codebook vector */ mu_Q9, /* I tradeoff between weighted error and rate */ - max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - cbk_size /* I number of vectors in codebook */ + max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + cbk_size, /* I number of vectors in codebook */ + arch /* I Run-time architecture */ ); rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); @@ -108,7 +110,7 @@ void silk_quant_LTP_gains( min_rate_dist_Q14 = rate_dist_Q14; *periodicity_index = (opus_int8)k; silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); - best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; + best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; } /* Break early in low-complexity mode if rate distortion is below threshold */ @@ -123,6 +125,5 @@ void silk_quant_LTP_gains( B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); } } - *sum_log_gain_Q7 = best_sum_log_gain_Q7; + *sum_log_gain_Q7 = best_sum_log_gain_Q7; } - diff --git a/media/libopus/silk/resampler_rom.c b/media/libopus/silk/resampler_rom.c index 2d502706f9..5e6b04476a 100644 --- a/media/libopus/silk/resampler_rom.c +++ b/media/libopus/silk/resampler_rom.c @@ -41,36 +41,36 @@ POSSIBILITY OF SUCH DAMAGE. /* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */ silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -20694, -13867, - -49, 64, 17, -157, 353, -496, 163, 11047, 22205, - -39, 6, 91, -170, 186, 23, -896, 6336, 19928, - -19, -36, 102, -89, -24, 328, -951, 2568, 15909, + -20694, -13867, + -49, 64, 17, -157, 353, -496, 163, 11047, 22205, + -39, 6, 91, -170, 186, 23, -896, 6336, 19928, + -19, -36, 102, -89, -24, 328, -951, 2568, 15909, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -14457, -14019, - 64, 128, -122, 36, 310, -768, 584, 9267, 17733, - 12, 128, 18, -142, 288, -117, -865, 4123, 14459, + -14457, -14019, + 64, 128, -122, 36, 310, -768, 584, 9267, 17733, + 12, 128, 18, -142, 288, -117, -865, 4123, 14459, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = { - 616, -14323, - -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024, + 616, -14323, + -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 16102, -15162, - -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271, + 16102, -15162, + -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 22500, -15099, - 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464, + 22500, -15099, + 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 27540, -15257, - 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455, + 27540, -15257, + 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455, }; silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = { @@ -81,16 +81,16 @@ silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = { /* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */ silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = { - { 189, -600, 617, 30567 }, - { 117, -159, -1070, 29704 }, - { 52, 221, -2392, 28276 }, - { -4, 529, -3350, 26341 }, - { -48, 758, -3956, 23973 }, - { -80, 905, -4235, 21254 }, - { -99, 972, -4222, 18278 }, - { -107, 967, -3957, 15143 }, - { -103, 896, -3487, 11950 }, - { -91, 773, -2865, 8798 }, - { -71, 611, -2143, 5784 }, - { -46, 425, -1375, 2996 }, + { 189, -600, 617, 30567 }, + { 117, -159, -1070, 29704 }, + { 52, 221, -2392, 28276 }, + { -4, 529, -3350, 26341 }, + { -48, 758, -3956, 23973 }, + { -80, 905, -4235, 21254 }, + { -99, 972, -4222, 18278 }, + { -107, 967, -3957, 15143 }, + { -103, 896, -3487, 11950 }, + { -91, 773, -2865, 8798 }, + { -71, 611, -2143, 5784 }, + { -46, 425, -1375, 2996 }, }; diff --git a/media/libopus/silk/shell_coder.c b/media/libopus/silk/shell_coder.c index 796f57d6c2..4af341474b 100644 --- a/media/libopus/silk/shell_coder.c +++ b/media/libopus/silk/shell_coder.c @@ -58,8 +58,8 @@ static OPUS_INLINE void encode_split( } static OPUS_INLINE void decode_split( - opus_int *p_child1, /* O pulse amplitude of first child subframe */ - opus_int *p_child2, /* O pulse amplitude of second child subframe */ + opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ + opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int p, /* I pulse amplitude of current subframe */ const opus_uint8 *shell_table /* I table of shell cdfs */ @@ -117,12 +117,12 @@ void silk_shell_encoder( /* Shell decoder, operates on one shell code frame of 16 pulses */ void silk_shell_decoder( - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int pulses4 /* I number of pulses per pulse-subframe */ ) { - opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; + opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; /* this function operates on one shell code frame of 16 pulses */ silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); diff --git a/media/libopus/silk/structs.h b/media/libopus/silk/structs.h index 1826b36a80..827829dc6f 100644 --- a/media/libopus/silk/structs.h +++ b/media/libopus/silk/structs.h @@ -171,7 +171,7 @@ typedef struct { opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ - opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ + opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ diff --git a/media/libopus/silk/sum_sqr_shift.c b/media/libopus/silk/sum_sqr_shift.c index 12514c9917..129df191d8 100644 --- a/media/libopus/silk/sum_sqr_shift.c +++ b/media/libopus/silk/sum_sqr_shift.c @@ -53,6 +53,7 @@ void silk_sum_sqr_shift( /* Scale down */ nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); shft = 2; + i+=2; break; } } diff --git a/media/libopus/silk/tables.h b/media/libopus/silk/tables.h index a91431e854..7fea6fda39 100644 --- a/media/libopus/silk/tables.h +++ b/media/libopus/silk/tables.h @@ -47,8 +47,8 @@ extern const opus_uint8 silk_pitch_contour_NB_iCDF[ 11 ]; extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */ extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ]; /* 180 */ -extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ]; /* 162 */ +extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ SILK_MAX_PULSES + 2 ]; /* 180 */ +extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ SILK_MAX_PULSES + 2 ]; /* 162 */ extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ @@ -59,7 +59,7 @@ extern const opus_uint8 silk_shell_code_table0[ 152 ]; extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */ extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */ extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table_offsets[ MAX_PULSES + 1 ]; /* 17 */ +extern const opus_uint8 silk_shell_code_table_offsets[ SILK_MAX_PULSES + 1 ]; /* 17 */ extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */ diff --git a/media/libopus/silk/tuning_parameters.h b/media/libopus/silk/tuning_parameters.h index e1057bbaae..5b8f404235 100644 --- a/media/libopus/silk/tuning_parameters.h +++ b/media/libopus/silk/tuning_parameters.h @@ -64,7 +64,7 @@ extern "C" #define MU_LTP_QUANT_WB 0.02f /* Max cumulative LTP gain */ -#define MAX_SUM_LOG_GAIN_DB 250.0f +#define MAX_SUM_LOG_GAIN_DB 250.0f /***********************/ /* High pass filtering */ diff --git a/media/libopus/silk/x86/NSQ_del_dec_sse.c b/media/libopus/silk/x86/NSQ_del_dec_sse.c new file mode 100644 index 0000000000..21d4a8bc1e --- /dev/null +++ b/media/libopus/silk/x86/NSQ_del_dec_sse.c @@ -0,0 +1,857 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "main.h" +#include "celt/x86/x86cpu.h" + +#include "stack_alloc.h" + +typedef struct { + opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; + opus_int32 RandState[ DECISION_DELAY ]; + opus_int32 Q_Q10[ DECISION_DELAY ]; + opus_int32 Xq_Q14[ DECISION_DELAY ]; + opus_int32 Pred_Q15[ DECISION_DELAY ]; + opus_int32 Shape_Q14[ DECISION_DELAY ]; + opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 LF_AR_Q14; + opus_int32 Seed; + opus_int32 SeedInit; + opus_int32 RD_Q10; +} NSQ_del_dec_struct; + +typedef struct { + opus_int32 Q_Q10; + opus_int32 RD_Q10; + opus_int32 xq_Q14; + opus_int32 LF_AR_Q14; + opus_int32 sLTP_shp_Q14; + opus_int32 LPC_exc_Q14; +} NSQ_sample_struct; + +typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; + +static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + const opus_int32 x_Q3[], /* I Input in Q3 */ + opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ + const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I Subframe number */ + opus_int nStatesDelayedDecision, /* I Number of del dec states */ + const opus_int LTP_scale_Q14, /* I LTP state scaling */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type, /* I Signal type */ + const opus_int decisionDelay /* I Decision delay */ +); + +/******************************************/ +/* Noise shape quantizer for one subframe */ +/******************************************/ +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP filter state */ + opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int subfr, /* I Subframe number */ + opus_int shapingLPCOrder, /* I Shaping LPC filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + opus_int warping_Q16, /* I */ + opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ + opus_int decisionDelay /* I */ +); + +void silk_NSQ_del_dec_sse4_1( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) +{ + opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; + opus_int last_smple_idx, smpl_buf_idx, decisionDelay; + const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; + opus_int16 *pxq; + VARDECL( opus_int32, sLTP_Q15 ); + VARDECL( opus_int16, sLTP ); + opus_int32 HarmShapeFIRPacked_Q14; + opus_int offset_Q10; + opus_int32 RDmin_Q10, Gain_Q10; + VARDECL( opus_int32, x_sc_Q10 ); + VARDECL( opus_int32, delayedGain_Q10 ); + VARDECL( NSQ_del_dec_struct, psDelDec ); + NSQ_del_dec_struct *psDD; + SAVE_STACK; + + /* Set unvoiced lag to the previous one, overwrite later for voiced */ + lag = NSQ->lagPrev; + + silk_assert( NSQ->prev_gain_Q16 != 0 ); + + /* Initialize delayed decision states */ + ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); + silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); + for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + psDD->Seed = ( k + psIndices->Seed ) & 3; + psDD->SeedInit = psDD->Seed; + psDD->RD_Q10 = 0; + psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; + psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; + silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); + } + + offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; + smpl_buf_idx = 0; /* index of oldest samples */ + + decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); + + /* For voiced frames limit the decision delay to lower than the pitch lag */ + if( psIndices->signalType == TYPE_VOICED ) { + for( k = 0; k < psEncC->nb_subfr; k++ ) { + decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); + } + } else { + if( lag > 0 ) { + decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); + } + } + + if( psIndices->NLSFInterpCoef_Q2 == 4 ) { + LSF_interpolation_flag = 0; + } else { + LSF_interpolation_flag = 1; + } + + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); + ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); + ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); + /* Set up pointers to start of sub frame */ + pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; + NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + subfr = 0; + for( k = 0; k < psEncC->nb_subfr; k++ ) { + A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; + B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Noise shape parameters */ + silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); + HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); + HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); + + NSQ->rewhite_flag = 0; + if( psIndices->signalType == TYPE_VOICED ) { + /* Voiced */ + lag = pitchL[ k ]; + + /* Re-whitening */ + if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { + if( k == 2 ) { + /* RESET DELAYED DECISIONS */ + /* Find winner */ + RDmin_Q10 = psDelDec[ 0 ].RD_Q10; + Winner_ind = 0; + for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { + if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psDelDec[ i ].RD_Q10; + Winner_ind = i; + } + } + for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { + if( i != Winner_ind ) { + psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); + silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); + } + } + + /* Copy final part of signals from winner state to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + last_smple_idx = smpl_buf_idx + decisionDelay; + for( i = 0; i < decisionDelay; i++ ) { + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; + } + + subfr = 0; + } + + /* Rewhiten with new A coefs */ + start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; + silk_assert( start_idx > 0 ); + + silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); + + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + NSQ->rewhite_flag = 1; + } + } + + silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, + psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); + + silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, + delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], + Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, + psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); + + x_Q3 += psEncC->subfr_length; + pulses += psEncC->subfr_length; + pxq += psEncC->subfr_length; + } + + /* Find winner */ + RDmin_Q10 = psDelDec[ 0 ].RD_Q10; + Winner_ind = 0; + for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { + if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psDelDec[ k ].RD_Q10; + Winner_ind = k; + } + } + + /* Copy final part of signals from winner state to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + psIndices->Seed = psDD->SeedInit; + last_smple_idx = smpl_buf_idx + decisionDelay; + Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); + for( i = 0; i < decisionDelay; i++ ) { + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; + } + silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); + + /* Update states */ + NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; + NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; + + /* Save quantized speech signal */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ + silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); + silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + RESTORE_STACK; +} + +/******************************************/ +/* Noise shape quantizer for one subframe */ +/******************************************/ +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP filter state */ + opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int subfr, /* I Subframe number */ + opus_int shapingLPCOrder, /* I Shaping LPC filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + opus_int warping_Q16, /* I */ + opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ + opus_int decisionDelay /* I */ +) +{ + opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; + opus_int32 Winner_rand_state; + opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; + opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; + opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; + opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; + VARDECL( NSQ_sample_pair, psSampleState ); + NSQ_del_dec_struct *psDD; + NSQ_sample_struct *psSS; + + __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; + __m128i b_Q12_0123, b_sr_Q12_0123; + SAVE_STACK; + + silk_assert( nStatesDelayedDecision > 0 ); + ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); + + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; + pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); + + a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); + a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); + + if( opus_likely( predictLPCOrder == 16 ) ) { + a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); + a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); + } + + if( signalType == TYPE_VOICED ){ + b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); + b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + } + for( i = 0; i < length; i++ ) { + /* Perform common calculations used in all states */ + + /* Long-term prediction */ + if( signalType == TYPE_VOICED ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q14 = 2; + { + __m128i tmpa, tmpb, pred_lag_ptr_tmp; + pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); + pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B ); + tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 ); + tmpa = _mm_srli_si128( tmpa, 2 ); + + pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ + pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 ); + pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); + pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); + + tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */ + pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); + LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); + + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); + LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ + pred_lag_ptr++; + } + } else { + LTP_pred_Q14 = 0; + } + + /* Long-term shaping */ + if( lag > 0 ) { + /* Symmetric, packed FIR coefficients */ + n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ + shp_lag_ptr++; + } else { + n_LTP_Q14 = 0; + } + { + __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; + + for( k = 0; k < nStatesDelayedDecision; k++ ) { + /* Delayed decision state */ + psDD = &psDelDec[ k ]; + + /* Sample state */ + psSS = psSampleState[ k ]; + + /* Generate dither */ + psDD->Seed = silk_RAND( psDD->Seed ); + + /* Pointer used in short term prediction and shaping */ + psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; + /* Short-term prediction */ + silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); + + tmpb = _mm_setzero_si128(); + + /* step 1 */ + psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */ + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */ + tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ + + tmpa = _mm_srli_epi64( tmpa, 16 ); + tmpb = _mm_add_epi32( tmpb, tmpa ); + + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */ + psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); + tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); + + /* step 2 */ + psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) ); + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); + tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); + tmpa = _mm_srli_epi64( tmpa, 16 ); + tmpb = _mm_add_epi32( tmpb, tmpa ); + + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); + psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); + tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); + + if ( opus_likely( predictLPCOrder == 16 ) ) + { + /* step 3 */ + psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) ); + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); + tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB ); + tmpa = _mm_srli_epi64( tmpa, 16 ); + tmpb = _mm_add_epi32( tmpb, tmpa ); + + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */ + psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); + psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); + tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); + + /* setp 4 */ + psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); + tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); + tmpa = _mm_srli_epi64( tmpa, 16 ); + tmpb = _mm_add_epi32( tmpb, tmpa ); + + psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ + psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); + psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); + tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); + + /* add at last */ + /* equal shift right 8 bytes*/ + tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); + tmpb = _mm_add_epi32( tmpb, tmpa ); + LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); + } + else + { + /* add at last */ + tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/ + tmpb = _mm_add_epi32( tmpb, tmpa ); + LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); + + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); + } + + LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ + + /* Noise shape feedback */ + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ 0 ] = tmp2; + n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( j = 2; j < shapingLPCOrder; j += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); + psDD->sAR2_Q14[ j - 1 ] = tmp1; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ j + 0 ] = tmp2; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); + } + psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); + + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ + + n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ + + /* Input minus prediction plus noise feedback */ + /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ + tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ + tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ + + r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ + + /* Flip sign depending on dither */ + if ( psDD->Seed < 0 ) { + r_Q10 = -r_Q10; + } + r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); + + /* Find two quantization level candidates and measure their rate-distortion */ + q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); + q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if( q1_Q0 > 0 ) { + q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == 0 ) { + q1_Q10 = offset_Q10; + q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == -1 ) { + q2_Q10 = offset_Q10; + q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else { /* q1_Q0 < -1 */ + q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); + } + rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); + rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); + rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); + rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); + + if( rd1_Q10 < rd2_Q10 ) { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 0 ].Q_Q10 = q1_Q10; + psSS[ 1 ].Q_Q10 = q2_Q10; + } else { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 0 ].Q_Q10 = q2_Q10; + psSS[ 1 ].Q_Q10 = q1_Q10; + } + + /* Update states for best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 0 ].xq_Q14 = xq_Q14; + + /* Update states for second best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 1 ].xq_Q14 = xq_Q14; + } + } + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ + + /* Find winner */ + RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + Winner_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + Winner_ind = k; + } + } + + /* Increase RD values of expired states */ + Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; + for( k = 0; k < nStatesDelayedDecision; k++ ) { + if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { + psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); + psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); + silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); + } + } + + /* Find worst in first set and best in second set */ + RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; + RDmax_ind = 0; + RDmin_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + /* find worst in first set */ + if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { + RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + RDmax_ind = k; + } + /* find best in second set */ + if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; + RDmin_ind = k; + } + } + + /* Replace a state if best from second set outperforms worst in first set */ + if( RDmin_Q10 < RDmax_Q10 ) { + silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, + ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); + silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); + } + + /* Write samples from winner to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + if( subfr > 0 || i >= decisionDelay ) { + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; + sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; + } + NSQ->sLTP_shp_buf_idx++; + NSQ->sLTP_buf_idx++; + + /* Update states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + psSS = &psSampleState[ k ][ 0 ]; + psDD->LF_AR_Q14 = psSS->LF_AR_Q14; + psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; + psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; + psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; + psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); + psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; + psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); + psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; + psDD->RD_Q10 = psSS->RD_Q10; + } + delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; + } + /* Update LPC states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + } + RESTORE_STACK; +} + +static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + const opus_int32 x_Q3[], /* I Input in Q3 */ + opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ + const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I Subframe number */ + opus_int nStatesDelayedDecision, /* I Number of del dec states */ + const opus_int LTP_scale_Q14, /* I LTP state scaling */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type, /* I Signal type */ + const opus_int decisionDelay /* I Decision delay */ +) +{ + opus_int i, k, lag; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + NSQ_del_dec_struct *psDD; + __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + + lag = pitchL[ subfr ]; + inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); + + silk_assert( inv_gain_Q31 != 0 ); + + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + + /* Scale input */ + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + + /* prepare inv_gain_Q23 in packed 4 32-bits */ + xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + + for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { + xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + /* equal shift right 4 bytes*/ + xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); + xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + + xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); + xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + + xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); + } + + for( ; i < psEncC->subfr_length; i++ ) { + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ + if( NSQ->rewhite_flag ) { + if( subfr == 0 ) { + /* Do LTP downscaling */ + inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); + } + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + silk_assert( i < MAX_FRAME_LENGTH ); + sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); + } + } + + /* Adjust for changing gain */ + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + /* Scale long-term shaping state */ + { + __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; + + /* prepare gain_adj_Q16 in packed 4 32-bits */ + xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); + + for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) + { + xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); + /* equal shift right 4 bytes*/ + xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); + xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); + xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); + + _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); + } + + for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { + NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); + } + + /* Scale long-term prediction state */ + if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { + sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); + } + } + + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + + /* Scale scalar states */ + psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); + + /* Scale short-term prediction and shaping states */ + for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { + psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); + } + for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { + psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); + } + for( i = 0; i < DECISION_DELAY; i++ ) { + psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); + psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); + } + } + } + } +} diff --git a/media/libopus/silk/x86/NSQ_sse.c b/media/libopus/silk/x86/NSQ_sse.c new file mode 100644 index 0000000000..72f34fd6fc --- /dev/null +++ b/media/libopus/silk/x86/NSQ_sse.c @@ -0,0 +1,720 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "main.h" +#include "celt/x86/x86cpu.h" +#include "stack_alloc.h" + +static OPUS_INLINE void silk_nsq_scale_states_sse4_1( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int32 x_Q3[], /* I input in Q3 */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ +); + +static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( + silk_nsq_state *NSQ, /* I/O NSQ state */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_sc_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP state */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int32 table[][4] /* I */ +); + +void silk_NSQ_sse4_1( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) +{ + opus_int k, lag, start_idx, LSF_interpolation_flag; + const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; + opus_int16 *pxq; + VARDECL( opus_int32, sLTP_Q15 ); + VARDECL( opus_int16, sLTP ); + opus_int32 HarmShapeFIRPacked_Q14; + opus_int offset_Q10; + VARDECL( opus_int32, x_sc_Q10 ); + + opus_int32 table[ 64 ][ 4 ]; + opus_int32 tmp1; + opus_int32 q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; + + SAVE_STACK; + + NSQ->rand_seed = psIndices->Seed; + + /* Set unvoiced lag to the previous one, overwrite later for voiced */ + lag = NSQ->lagPrev; + + silk_assert( NSQ->prev_gain_Q16 != 0 ); + + offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; + + /* 0 */ + q1_Q10 = offset_Q10; + q2_Q10 = offset_Q10 + ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q20 = q1_Q10 * Lambda_Q10; + rd2_Q20 = q2_Q10 * Lambda_Q10; + + table[ 32 ][ 0 ] = q1_Q10; + table[ 32 ][ 1 ] = q2_Q10; + table[ 32 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); + table[ 32 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); + + /* -1 */ + q1_Q10 = offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); + q2_Q10 = offset_Q10; + rd1_Q20 = - q1_Q10 * Lambda_Q10; + rd2_Q20 = q2_Q10 * Lambda_Q10; + + table[ 31 ][ 0 ] = q1_Q10; + table[ 31 ][ 1 ] = q2_Q10; + table[ 31 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); + table[ 31 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); + + /* > 0 */ + for (k = 1; k <= 31; k++) + { + tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); + + q1_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10; + q2_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10 + 1024; + rd1_Q20 = q1_Q10 * Lambda_Q10; + rd2_Q20 = q2_Q10 * Lambda_Q10; + + table[ 32 + k ][ 0 ] = q1_Q10; + table[ 32 + k ][ 1 ] = q2_Q10; + table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); + table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); + } + + /* < -1 */ + for (k = -32; k <= -2; k++) + { + tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); + + q1_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10; + q2_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10 + 1024; + rd1_Q20 = - q1_Q10 * Lambda_Q10; + rd2_Q20 = - q2_Q10 * Lambda_Q10; + + table[ 32 + k ][ 0 ] = q1_Q10; + table[ 32 + k ][ 1 ] = q2_Q10; + table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); + table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); + } + + if( psIndices->NLSFInterpCoef_Q2 == 4 ) { + LSF_interpolation_flag = 0; + } else { + LSF_interpolation_flag = 1; + } + + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); + ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); + /* Set up pointers to start of sub frame */ + NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; + for( k = 0; k < psEncC->nb_subfr; k++ ) { + A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; + B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Noise shape parameters */ + silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); + HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); + HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); + + NSQ->rewhite_flag = 0; + if( psIndices->signalType == TYPE_VOICED ) { + /* Voiced */ + lag = pitchL[ k ]; + + /* Re-whitening */ + if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { + /* Rewhiten with new A coefs */ + start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; + silk_assert( start_idx > 0 ); + + silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); + + NSQ->rewhite_flag = 1; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + } + } + + silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); + + if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) ) + { + silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, + AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], + offset_Q10, psEncC->subfr_length, &(table[32]) ); + } + else + { + silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, + AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, + offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); + } + + x_Q3 += psEncC->subfr_length; + pulses += psEncC->subfr_length; + pxq += psEncC->subfr_length; + } + + /* Update lagPrev for next frame */ + NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; + + /* Save quantized speech and noise shaping signals */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ + silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); + silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + RESTORE_STACK; +} + +/***********************************/ +/* silk_noise_shape_quantizer_10_16 */ +/***********************************/ +static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( + silk_nsq_state *NSQ, /* I/O NSQ state */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_sc_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP state */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int32 table[][4] /* I */ +) +{ + opus_int i; + opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; + opus_int32 n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10; + opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; + opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; + + __m128i xmm_tempa, xmm_tempb; + + __m128i xmm_one; + + __m128i psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF; + __m128i psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF; + __m128i a_Q12_01234567, a_Q12_89ABCDEF; + + __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210; + __m128i AR_shp_Q13_76543210; + + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; + pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); + + /* Set up short term AR state */ + psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; + + sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14; + xq_Q14 = psLPC_Q14[ 0 ]; + LTP_pred_Q13 = 0; + + /* load a_Q12 */ + xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 ); + + /* load a_Q12[0] - a_Q12[7] */ + a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) ); + /* load a_Q12[ 8 ] - a_Q12[ 15 ] */ + a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) ); + + a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one ); + a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one ); + + /* load AR_shp_Q13 */ + AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) ); + + /* load psLPC_Q14 */ + xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 ); + + xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) ); + xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) ); + + xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); + xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); + + psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); + psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); + + xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) ); + xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) ); + + xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); + xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); + + psLPC_Q14_hi_01234567 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); + psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); + + /* load sAR2_Q14 */ + xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) ); + xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) ); + + xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); + xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); + + sAR2_Q14_hi_76543210 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); + sAR2_Q14_lo_76543210 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); + + /* prepare 1 in 8 * 16bit */ + xmm_one = _mm_set1_epi16(1); + + for( i = 0; i < length; i++ ) + { + /* Short-term prediction */ + __m128i xmm_hi_07, xmm_hi_8F, xmm_lo_07, xmm_lo_8F; + + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q10 = 8; /* silk_RSHIFT( predictLPCOrder, 1 ); */ + + /* shift psLPC_Q14 */ + psLPC_Q14_hi_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF, 2 ); + psLPC_Q14_lo_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF, 2 ); + + psLPC_Q14_hi_01234567 = _mm_srli_si128( psLPC_Q14_hi_01234567, 2 ); + psLPC_Q14_lo_01234567 = _mm_srli_si128( psLPC_Q14_lo_01234567, 2 ); + + psLPC_Q14_hi_01234567 = _mm_insert_epi16( psLPC_Q14_hi_01234567, (xq_Q14 >> 16), 7 ); + psLPC_Q14_lo_01234567 = _mm_insert_epi16( psLPC_Q14_lo_01234567, (xq_Q14), 7 ); + + /* high part, use pmaddwd, results in 4 32-bit */ + xmm_hi_07 = _mm_madd_epi16( psLPC_Q14_hi_01234567, a_Q12_01234567 ); + xmm_hi_8F = _mm_madd_epi16( psLPC_Q14_hi_89ABCDEF, a_Q12_89ABCDEF ); + + /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed, _mm_srai_epi16(psLPC_Q14_lo_01234567, 15) */ + xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_01234567 ); + xmm_tempb = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_89ABCDEF ); + + xmm_tempa = _mm_and_si128( xmm_tempa, a_Q12_01234567 ); + xmm_tempb = _mm_and_si128( xmm_tempb, a_Q12_89ABCDEF ); + + xmm_lo_07 = _mm_mulhi_epi16( psLPC_Q14_lo_01234567, a_Q12_01234567 ); + xmm_lo_8F = _mm_mulhi_epi16( psLPC_Q14_lo_89ABCDEF, a_Q12_89ABCDEF ); + + xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); + xmm_lo_8F = _mm_add_epi16( xmm_lo_8F, xmm_tempb ); + + xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); + xmm_lo_8F = _mm_madd_epi16( xmm_lo_8F, xmm_one ); + + /* accumulate */ + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_hi_8F ); + xmm_lo_07 = _mm_add_epi32( xmm_lo_07, xmm_lo_8F ); + + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); + + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); + + LPC_pred_Q10 += _mm_cvtsi128_si32( xmm_hi_07 ); + + /* Long-term prediction */ + if ( opus_likely( signalType == TYPE_VOICED ) ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q13 = 2; + { + __m128i b_Q14_3210, b_Q14_0123, pred_lag_ptr_0123; + + b_Q14_3210 = OP_CVTEPI16_EPI32_M64( b_Q14 ); + b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B ); + + /* loaded: [0] [-1] [-2] [-3] */ + pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); + /* shuffle to [-3] [-2] [-1] [0] and to new xmm */ + xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B ); + /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */ + xmm_tempa = _mm_mul_epi32( xmm_tempa, b_Q14_3210 ); + /* right shift 2 bytes (16 bits), zero extended */ + xmm_tempa = _mm_srli_si128( xmm_tempa, 2 ); + + /* a[1] * b[-1], a[3] * b[-3] */ + pred_lag_ptr_0123 = _mm_mul_epi32( pred_lag_ptr_0123, b_Q14_0123 ); + pred_lag_ptr_0123 = _mm_srli_si128( pred_lag_ptr_0123, 2 ); + + pred_lag_ptr_0123 = _mm_add_epi32( pred_lag_ptr_0123, xmm_tempa ); + /* equal shift right 8 bytes*/ + xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, _MM_SHUFFLE( 0, 0, 3, 2 ) ); + xmm_tempa = _mm_add_epi32( xmm_tempa, pred_lag_ptr_0123 ); + + LTP_pred_Q13 += _mm_cvtsi128_si32( xmm_tempa ); + + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); + pred_lag_ptr++; + } + } + + /* Noise shape feedback */ + NSQ->sAR2_Q14[ 9 ] = NSQ->sAR2_Q14[ 8 ]; + NSQ->sAR2_Q14[ 8 ] = _mm_cvtsi128_si32( _mm_srli_si128(_mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ), 12 ) ); + + sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 ); + sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 ); + + sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 ); + sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14), 0 ); + + /* high part, use pmaddwd, results in 4 32-bit */ + xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 ); + + /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed,_mm_srai_epi16(sAR2_Q14_lo_76543210, 15) */ + xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), sAR2_Q14_lo_76543210 ); + xmm_tempa = _mm_and_si128( xmm_tempa, AR_shp_Q13_76543210 ); + + xmm_lo_07 = _mm_mulhi_epi16( sAR2_Q14_lo_76543210, AR_shp_Q13_76543210 ); + xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); + + xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); + + /* accumulate */ + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); + + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); + xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); + + n_AR_Q12 = 5 + _mm_cvtsi128_si32( xmm_hi_07 ); + + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 8 ], AR_shp_Q13[ 8 ] ); + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 9 ], AR_shp_Q13[ 9 ] ); + + n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */ + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, sLF_AR_shp_Q14, Tilt_Q14 ); + + n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); + n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 ); + + silk_assert( lag > 0 || signalType != TYPE_VOICED ); + + /* Combine prediction and noise shaping signals */ + tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ + tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ + if( lag > 0 ) { + /* Symmetric, packed FIR coefficients */ + n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); + shp_lag_ptr++; + + tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ + tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ + tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ + } else { + tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ + } + + r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ + + /* Generate dither */ + NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); + + /* Flip sign depending on dither */ + tmp2 = -r_Q10; + if ( NSQ->rand_seed < 0 ) r_Q10 = tmp2; + + r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); + + /* Find two quantization level candidates and measure their rate-distortion */ + q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); + q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + + q1_Q10 = table[q1_Q0][0]; + q2_Q10 = table[q1_Q0][1]; + + if (r_Q10 * table[q1_Q0][2] - table[q1_Q0][3] < 0) + { + q1_Q10 = q2_Q10; + } + + pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); + + /* Excitation */ + exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); + + tmp2 = -exc_Q14; + if ( NSQ->rand_seed < 0 ) exc_Q14 = tmp2; + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); + xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); + + /* Update states */ + psLPC_Q14++; + *psLPC_Q14 = xq_Q14; + sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); + + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); + sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); + NSQ->sLTP_shp_buf_idx++; + NSQ->sLTP_buf_idx++; + + /* Make dither dependent on quantized signal */ + NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); + } + + NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; + + /* Scale XQ back to normal level before saving */ + psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH ]; + + /* write back sAR2_Q14 */ + xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); + xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); + _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa ); + _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb ); + + /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */ + { + __m128i xmm_Gain_Q10; + __m128i xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, xmm_xq_Q14_7654, xmm_xq_Q14_x7x5; + + /* prepare (1 << 7) in packed 4 32-bits */ + xmm_tempa = _mm_set1_epi32( (1 << 7) ); + + /* prepare Gain_Q10 in packed 4 32-bits */ + xmm_Gain_Q10 = _mm_set1_epi32( Gain_Q10 ); + + /* process xq */ + for (i = 0; i < length - 7; i += 8) + { + xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) ); + xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) ); + + /* equal shift right 4 bytes*/ + xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + /* equal shift right 4 bytes*/ + xmm_xq_Q14_x7x5 = _mm_shuffle_epi32( xmm_xq_Q14_7654, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_xq_Q14_3210 = _mm_mul_epi32( xmm_xq_Q14_3210, xmm_Gain_Q10 ); + xmm_xq_Q14_x3x1 = _mm_mul_epi32( xmm_xq_Q14_x3x1, xmm_Gain_Q10 ); + xmm_xq_Q14_7654 = _mm_mul_epi32( xmm_xq_Q14_7654, xmm_Gain_Q10 ); + xmm_xq_Q14_x7x5 = _mm_mul_epi32( xmm_xq_Q14_x7x5, xmm_Gain_Q10 ); + + xmm_xq_Q14_3210 = _mm_srli_epi64( xmm_xq_Q14_3210, 16 ); + xmm_xq_Q14_x3x1 = _mm_slli_epi64( xmm_xq_Q14_x3x1, 16 ); + xmm_xq_Q14_7654 = _mm_srli_epi64( xmm_xq_Q14_7654, 16 ); + xmm_xq_Q14_x7x5 = _mm_slli_epi64( xmm_xq_Q14_x7x5, 16 ); + + xmm_xq_Q14_3210 = _mm_blend_epi16( xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, 0xCC ); + xmm_xq_Q14_7654 = _mm_blend_epi16( xmm_xq_Q14_7654, xmm_xq_Q14_x7x5, 0xCC ); + + /* silk_RSHIFT_ROUND(xq, 8) */ + xmm_xq_Q14_3210 = _mm_add_epi32( xmm_xq_Q14_3210, xmm_tempa ); + xmm_xq_Q14_7654 = _mm_add_epi32( xmm_xq_Q14_7654, xmm_tempa ); + + xmm_xq_Q14_3210 = _mm_srai_epi32( xmm_xq_Q14_3210, 8 ); + xmm_xq_Q14_7654 = _mm_srai_epi32( xmm_xq_Q14_7654, 8 ); + + /* silk_SAT16 */ + xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 ); + + /* save to xq */ + _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 ); + } + } + for ( ; i < length; i++) + { + xq[i] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); + } + + /* Update LPC synth buffer */ + silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); +} + +static OPUS_INLINE void silk_nsq_scale_states_sse4_1( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int32 x_Q3[], /* I input in Q3 */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ +) +{ + opus_int i, lag; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + + lag = pitchL[ subfr ]; + inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); + silk_assert( inv_gain_Q31 != 0 ); + + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + + /* Scale input */ + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + + /* prepare inv_gain_Q23 in packed 4 32-bits */ + xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + + for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { + xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + + /* equal shift right 4 bytes*/ + xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); + xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + + xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); + xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + + xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 ); + } + + for( ; i < psEncC->subfr_length; i++ ) { + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ + if( NSQ->rewhite_flag ) { + if( subfr == 0 ) { + /* Do LTP downscaling */ + inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); + } + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + silk_assert( i < MAX_FRAME_LENGTH ); + sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); + } + } + + /* Adjust for changing gain */ + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + /* Scale long-term shaping state */ + __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; + + /* prepare gain_adj_Q16 in packed 4 32-bits */ + xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16); + + for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) + { + xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); + /* equal shift right 4 bytes*/ + xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); + xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); + xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); + + xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); + + _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); + } + + for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { + NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); + } + + /* Scale long-term prediction state */ + if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); + } + } + + NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); + + /* Scale short-term prediction and shaping states */ + for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { + NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); + } + for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { + NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); + } + } +} diff --git a/media/libopus/silk/x86/SigProc_FIX_sse.h b/media/libopus/silk/x86/SigProc_FIX_sse.h new file mode 100644 index 0000000000..61efa8da41 --- /dev/null +++ b/media/libopus/silk/x86/SigProc_FIX_sse.h @@ -0,0 +1,94 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIGPROC_FIX_SSE_H +#define SIGPROC_FIX_SSE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) +void silk_burg_modified_sse4_1( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ +); + +#if defined(OPUS_X86_PRESUME_SSE4_1) +#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ + ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) + +#else + +extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */); + +# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ + ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) + +#endif + +opus_int64 silk_inner_prod16_aligned_64_sse4_1( + const opus_int16 *inVec1, + const opus_int16 *inVec2, + const opus_int len +); + + +#if defined(OPUS_X86_PRESUME_SSE4_1) + +#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len)) + +#else + +extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])( + const opus_int16 *inVec1, + const opus_int16 *inVec2, + const opus_int len); + +# define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ + ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) + +#endif +#endif +#endif diff --git a/media/libopus/silk/x86/VAD_sse.c b/media/libopus/silk/x86/VAD_sse.c new file mode 100644 index 0000000000..4e90f4410d --- /dev/null +++ b/media/libopus/silk/x86/VAD_sse.c @@ -0,0 +1,277 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "main.h" +#include "stack_alloc.h" + +/* Weighting factors for tilt measure */ +static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; + +/***************************************/ +/* Get the speech activity level in Q8 */ +/***************************************/ +opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if success */ + silk_encoder_state *psEncC, /* I/O Encoder state */ + const opus_int16 pIn[] /* I PCM input */ +) +{ + opus_int SA_Q15, pSNR_dB_Q7, input_tilt; + opus_int decimated_framelength1, decimated_framelength2; + opus_int decimated_framelength; + opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; + opus_int32 sumSquared, smooth_coef_Q16; + opus_int16 HPstateTmp; + VARDECL( opus_int16, X ); + opus_int32 Xnrg[ VAD_N_BANDS ]; + opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; + opus_int32 speech_nrg, x_tmp; + opus_int X_offset[ VAD_N_BANDS ]; + opus_int ret = 0; + silk_VAD_state *psSilk_VAD = &psEncC->sVAD; + + SAVE_STACK; + + /* Safety checks */ + silk_assert( VAD_N_BANDS == 4 ); + silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); + silk_assert( psEncC->frame_length <= 512 ); + silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); + + /***********************/ + /* Filter and Decimate */ + /***********************/ + decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); + decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); + decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); + /* Decimate into 4 bands: + 0 L 3L L 3L 5L + - -- - -- -- + 8 8 2 4 4 + + [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | + + They're arranged to allow the minimal ( frame_length / 4 ) extra + scratch space during the downsampling process */ + X_offset[ 0 ] = 0; + X_offset[ 1 ] = decimated_framelength + decimated_framelength2; + X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; + X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; + ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); + + /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ + silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], + X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); + + /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ + silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], + X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); + + /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ + silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], + X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); + + /*********************************************/ + /* HP filter on lowest band (differentiator) */ + /*********************************************/ + X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); + HPstateTmp = X[ decimated_framelength - 1 ]; + for( i = decimated_framelength - 1; i > 0; i-- ) { + X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); + X[ i ] -= X[ i - 1 ]; + } + X[ 0 ] -= psSilk_VAD->HPstate; + psSilk_VAD->HPstate = HPstateTmp; + + /*************************************/ + /* Calculate the energy in each band */ + /*************************************/ + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* Find the decimated framelength in the non-uniformly divided bands */ + decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); + + /* Split length into subframe lengths */ + dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); + dec_subframe_offset = 0; + + /* Compute energy per sub-frame */ + /* initialize with summed energy of last subframe */ + Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; + for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { + __m128i xmm_X, xmm_acc; + sumSquared = 0; + + xmm_acc = _mm_setzero_si128(); + + for( i = 0; i < dec_subframe_length - 7; i += 8 ) + { + xmm_X = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) ); + xmm_X = _mm_srai_epi16( xmm_X, 3 ); + xmm_X = _mm_madd_epi16( xmm_X, xmm_X ); + xmm_acc = _mm_add_epi32( xmm_acc, xmm_X ); + } + + xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) ); + xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) ); + + sumSquared += _mm_cvtsi128_si32( xmm_acc ); + + for( ; i < dec_subframe_length; i++ ) { + /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ + /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ + x_tmp = silk_RSHIFT( + X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); + sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); + + /* Safety check */ + silk_assert( sumSquared >= 0 ); + } + + /* Add/saturate summed energy of current subframe */ + if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { + Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); + } else { + /* Look-ahead subframe */ + Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); + } + + dec_subframe_offset += dec_subframe_length; + } + psSilk_VAD->XnrgSubfr[ b ] = sumSquared; + } + + /********************/ + /* Noise estimation */ + /********************/ + silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); + + /***********************************************/ + /* Signal-plus-noise to noise ratio estimation */ + /***********************************************/ + sumSquared = 0; + input_tilt = 0; + for( b = 0; b < VAD_N_BANDS; b++ ) { + speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; + if( speech_nrg > 0 ) { + /* Divide, with sufficient resolution */ + if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { + NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); + } else { + NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); + } + + /* Convert to log domain */ + SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; + + /* Sum-of-squares */ + sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ + + /* Tilt measure */ + if( speech_nrg < ( (opus_int32)1 << 20 ) ) { + /* Scale down SNR value for small subband speech energies */ + SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); + } + input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); + } else { + NrgToNoiseRatio_Q8[ b ] = 256; + } + } + + /* Mean-of-squares */ + sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ + + /* Root-mean-square approximation, scale to dBs, and write to output pointer */ + pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ + + /*********************************/ + /* Speech Probability Estimation */ + /*********************************/ + SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); + + /**************************/ + /* Frequency Tilt Measure */ + /**************************/ + psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); + + /**************************************************/ + /* Scale the sigmoid output based on power levels */ + /**************************************************/ + speech_nrg = 0; + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ + speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); + } + + /* Power scaling */ + if( speech_nrg <= 0 ) { + SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); + } else if( speech_nrg < 32768 ) { + if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); + } else { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); + } + + /* square-root */ + speech_nrg = silk_SQRT_APPROX( speech_nrg ); + SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); + } + + /* Copy the resulting speech activity in Q8 */ + psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); + + /***********************************/ + /* Energy Level and SNR estimation */ + /***********************************/ + /* Smoothing coefficient */ + smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); + + if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { + smooth_coef_Q16 >>= 1; + } + + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* compute smoothed energy-to-noise ratio per band */ + psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], + NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); + + /* signal to noise ratio in dB per band */ + SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); + /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ + psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); + } + + RESTORE_STACK; + return( ret ); +} diff --git a/media/libopus/silk/x86/VQ_WMat_EC_sse.c b/media/libopus/silk/x86/VQ_WMat_EC_sse.c new file mode 100644 index 0000000000..74d6c6d0ec --- /dev/null +++ b/media/libopus/silk/x86/VQ_WMat_EC_sse.c @@ -0,0 +1,142 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "main.h" +#include "celt/x86/x86cpu.h" + +/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ +void silk_VQ_WMat_EC_sse4_1( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +) +{ + opus_int k, gain_tmp_Q7; + const opus_int8 *cb_row_Q7; + opus_int16 diff_Q14[ 5 ]; + opus_int32 sum1_Q14, sum2_Q16; + + __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; + /* Loop over codebook */ + *rate_dist_Q14 = silk_int32_MAX; + cb_row_Q7 = cb_Q7; + for( k = 0; k < L; k++ ) { + gain_tmp_Q7 = cb_gain_Q7[k]; + + diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); + + C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); + C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); + C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); + C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); + + diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); + diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); + diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); + diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); + + /* Weighted rate */ + sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); + + /* Penalty for too large gain */ + sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); + + silk_assert( sum1_Q14 >= 0 ); + + /* first row of W_Q18 */ + C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); + C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); + C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); + + C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ + C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ + + C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); + C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); + + C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); + C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); + + C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); + sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); + + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); + + /* second row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); + + /* third row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); + + /* fourth row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); + + /* last row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); + + silk_assert( sum1_Q14 >= 0 ); + + /* find best */ + if( sum1_Q14 < *rate_dist_Q14 ) { + *rate_dist_Q14 = sum1_Q14; + *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; + } + + /* Go to next cbk vector */ + cb_row_Q7 += LTP_ORDER; + } +} diff --git a/media/libopus/silk/x86/main_sse.h b/media/libopus/silk/x86/main_sse.h new file mode 100644 index 0000000000..afd5ec26e1 --- /dev/null +++ b/media/libopus/silk/x86/main_sse.h @@ -0,0 +1,276 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef MAIN_SSE_H +#define MAIN_SSE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) + +# define OVERRIDE_silk_VQ_WMat_EC + +void silk_VQ_WMat_EC_sse4_1( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +); + +#if defined OPUS_X86_PRESUME_SSE4_1 + +#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L, arch) \ + ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L)) + +#else + +extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +); + +# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L, arch) \ + ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L)) + +#endif + +# define OVERRIDE_silk_NSQ + +void silk_NSQ_sse4_1( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +#if defined OPUS_X86_PRESUME_SSE4_1 + +#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) + +#else + +extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) + +#endif + +# define OVERRIDE_silk_NSQ_del_dec + +void silk_NSQ_del_dec_sse4_1( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +#if defined OPUS_X86_PRESUME_SSE4_1 + +#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) + +#else + +extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) + +#endif + +void silk_noise_shape_quantizer( + silk_nsq_state *NSQ, /* I/O NSQ state */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_sc_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP state */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ + opus_int predictLPCOrder /* I Prediction filter order */ +); + +/**************************/ +/* Noise level estimation */ +/**************************/ +void silk_VAD_GetNoiseLevels( + const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ + silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ +); + +# define OVERRIDE_silk_VAD_GetSA_Q8 + +opus_int silk_VAD_GetSA_Q8_sse4_1( + silk_encoder_state *psEnC, + const opus_int16 pIn[] +); + +#if defined(OPUS_X86_PRESUME_SSE4_1) +#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) + +#else + +# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ + ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) + +extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( + silk_encoder_state *psEnC, + const opus_int16 pIn[]); + +# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX + +#endif + +void silk_warped_LPC_analysis_filter_FIX_sse4_1( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +#if defined(OPUS_X86_PRESUME_SSE4_1) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + +#else + +extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + +#endif + +# endif +#endif diff --git a/media/libopus/silk/x86/x86_silk_map.c b/media/libopus/silk/x86/x86_silk_map.c new file mode 100644 index 0000000000..818841f2c1 --- /dev/null +++ b/media/libopus/silk/x86/x86_silk_map.c @@ -0,0 +1,174 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(HAVE_CONFIG_H) +#include "config.h" +#endif + +#include "celt/x86/x86cpu.h" +#include "structs.h" +#include "SigProc_FIX.h" +#include "pitch.h" +#include "main.h" + +#if !defined(OPUS_X86_PRESUME_SSE4_1) + +#if defined(FIXED_POINT) + +#include "fixed/main_FIX.h" + +opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( + const opus_int16 *inVec1, + const opus_int16 *inVec2, + const opus_int len +) = { + silk_inner_prod16_aligned_64_c, /* non-sse */ + silk_inner_prod16_aligned_64_c, + silk_inner_prod16_aligned_64_c, + MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ +}; + +#endif + +opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( + silk_encoder_state *psEncC, + const opus_int16 pIn[] +) = { + silk_VAD_GetSA_Q8_c, /* non-sse */ + silk_VAD_GetSA_Q8_c, + silk_VAD_GetSA_Q8_c, + MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ +}; + +void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) = { + silk_NSQ_c, /* non-sse */ + silk_NSQ_c, + silk_NSQ_c, + MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ +}; + +void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +) = { + silk_VQ_WMat_EC_c, /* non-sse */ + silk_VQ_WMat_EC_c, + silk_VQ_WMat_EC_c, + MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ +}; + +void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) = { + silk_NSQ_del_dec_c, /* non-sse */ + silk_NSQ_del_dec_c, + silk_NSQ_del_dec_c, + MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ +}; + +#if defined(FIXED_POINT) + +void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) = { + silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */ + silk_warped_LPC_analysis_filter_FIX_c, + silk_warped_LPC_analysis_filter_FIX_c, + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ +}; + +void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ +) = { + silk_burg_modified_c, /* non-sse */ + silk_burg_modified_c, + silk_burg_modified_c, + MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ +}; + +#endif +#endif diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild index 4081418221..dfc2c62e83 100644 --- a/media/libopus/sources.mozbuild +++ b/media/libopus/sources.mozbuild @@ -18,11 +18,28 @@ celt_sources = [ 'celt/vq.c', ] -celt_nonunified_sources = [ +opus_nonunified_sources = [ # Disabled because of name clash of opus_custom_encoder_get_size. 'celt/celt_decoder.c', - # Disabled because of name clash of opus_custom_encoder_get_size. 'celt/celt_encoder.c', + # Disabled for (safe) warning about QA redefinition. + 'silk/LPC_inv_pred_gain.c', + 'silk/NLSF2A.c', +] + +celt_sources_sse = [ + 'celt/x86/pitch_sse.c', + 'celt/x86/x86_celt_map.c', + 'celt/x86/x86cpu.c', +] + +celt_sources_sse2 = [ + 'celt/x86/pitch_sse2.c', +] + +celt_sources_sse4_1 = [ + 'celt/x86/celt_lpc_sse.c', + 'celt/x86/pitch_sse4_1.c', ] celt_sources_arm = [ @@ -38,6 +55,13 @@ celt_am_sources_arm_asm = [ 'celt/arm/armopts.s.in', ] +celt_sources_arm_neon_intr = [ + 'celt/arm/celt_ne10_fft.c', + 'celt/arm/celt_ne10_mdct.c', + 'celt/arm/celt_neon_intr.c', + 'CELT_SOURCES_ARM_NE10=', +] + opus_sources = [ 'src/opus.c', 'src/opus_decoder.c', @@ -88,8 +112,6 @@ silk_sources = [ 'silk/log2lin.c', 'silk/LP_variable_cutoff.c', 'silk/LPC_analysis_filter.c', - 'silk/LPC_inv_pred_gain.c', - 'silk/NLSF2A.c', 'silk/NLSF_decode.c', 'silk/NLSF_del_dec_quant.c', 'silk/NLSF_encode.c', @@ -133,6 +155,14 @@ silk_sources = [ 'silk/VQ_WMat_EC.c', ] +silk_sources_sse4_1 = [ + 'silk/x86/NSQ_del_dec_sse.c', + 'silk/x86/NSQ_sse.c', + 'silk/x86/VAD_sse.c', + 'silk/x86/VQ_WMat_EC_sse.c', + 'silk/x86/x86_silk_map.c', +] + silk_sources_fixed = [ 'silk/fixed/apply_sine_window_FIX.c', 'silk/fixed/autocorr_FIX.c', @@ -161,6 +191,12 @@ silk_sources_fixed = [ 'silk/fixed/warped_autocorrelation_FIX.c', ] +silk_sources_fixed_sse4_1 = [ + 'silk/fixed/x86/burg_modified_FIX_sse.c', + 'silk/fixed/x86/prefilter_FIX_sse.c', + 'silk/fixed/x86/vector_ops_FIX_sse.c', +] + silk_sources_float = [ 'silk/float/apply_sine_window_FLP.c', 'silk/float/autocorrelation_FLP.c', diff --git a/media/libopus/src/analysis.c b/media/libopus/src/analysis.c index 778a62aabf..360ebcc8dd 100644 --- a/media/libopus/src/analysis.c +++ b/media/libopus/src/analysis.c @@ -39,8 +39,6 @@ #include "mlp.h" #include "stack_alloc.h" -extern const MLP net; - #ifndef M_PI #define M_PI 3.141592653 #endif @@ -140,6 +138,21 @@ static OPUS_INLINE float fast_atan2f(float y, float x) { } } +void tonality_analysis_init(TonalityAnalysisState *tonal) +{ + /* Initialize reusable fields. */ + tonal->arch = opus_select_arch(); + /* Clear remaining fields. */ + tonality_analysis_reset(tonal); +} + +void tonality_analysis_reset(TonalityAnalysisState *tonal) +{ + /* Clear non-reusable fields. */ + char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; + OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); +} + void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) { int pos; @@ -189,7 +202,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int info_out->music_prob = psum; } -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; @@ -262,7 +275,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out); + opus_fft(kfft, in, out, tonal->arch); +#ifndef FIXED_POINT + /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ + if (celt_isnan(out[0].r)) + { + info->valid = 0; + RESTORE_STACK; + return; + } +#endif for (i=1;ivalid = 0; + RESTORE_STACK; + return; + } +#endif + tonal->E[tonal->E_count][b] = E; frame_noisiness += nE/(1e-15f+E); @@ -611,8 +643,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ info->noisiness = frame_noisiness; info->valid = 1; - if (info_out!=NULL) - OPUS_COPY(info_out, info, 1); RESTORE_STACK; } @@ -631,7 +661,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len>0); diff --git a/media/libopus/src/analysis.h b/media/libopus/src/analysis.h index be0388faa3..9eae56a525 100644 --- a/media/libopus/src/analysis.h +++ b/media/libopus/src/analysis.h @@ -39,6 +39,8 @@ #define DETECT_SIZE 200 typedef struct { + int arch; +#define TONALITY_ANALYSIS_RESET_START angle float angle[240]; float d_angle[240]; float d2_angle[240]; @@ -78,8 +80,19 @@ typedef struct { AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, - const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); +/** Initialize a TonalityAnalysisState struct. + * + * This performs some possibly slow initialization steps which should + * not be repeated every analysis step. No allocated memory is retained + * by the state struct, so no cleanup call is required. + */ +void tonality_analysis_init(TonalityAnalysisState *analysis); + +/** Reset a TonalityAnalysisState stuct. + * + * Call this when there's a discontinuity in the data. + */ +void tonality_analysis_reset(TonalityAnalysisState *analysis); void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); diff --git a/media/libopus/src/mlp.c b/media/libopus/src/mlp.c index 4638602667..ff9e50df47 100644 --- a/media/libopus/src/mlp.c +++ b/media/libopus/src/mlp.c @@ -41,77 +41,82 @@ #if 0 static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; + int i; + opus_val16 xx; /* Q11 */ + /*double x, y;*/ + opus_val16 dy, yy; /* Q14 */ + /*x = 1.9073e-06*_x;*/ + if (_x>=QCONST32(8,19)) + return QCONST32(1.,14); + if (_x<=-QCONST32(8,19)) + return -QCONST32(1.,14); + xx = EXTRACT16(SHR32(_x, 8)); + /*i = lrint(25*x);*/ + i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); + /*x -= .04*i;*/ + xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); + /*x = xx*(1./2048);*/ + /*y = tansig_table[250+i];*/ + yy = tansig_table[250+i]; + /*y = yy*(1./16384);*/ + dy = 16384-MULT16_16_Q14(yy,yy); + yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); + return yy; } #else /*extern const float tansig_table[501];*/ static OPUS_INLINE float tansig_approx(float x) { - int i; - float y, dy; - float sign=1; - /* Tests are reversed to catch NaNs */ + int i; + float y, dy; + float sign=1; + /* Tests are reversed to catch NaNs */ if (!(x<8)) return 1; if (!(x>-8)) return -1; - if (x<0) - { - x=-x; - sign=-1; - } - i = (int)floor(.5f+25*x); - x -= .04f*i; - y = tansig_table[i]; - dy = 1-y*y; - y = y + x*dy*(1 - y*x); - return sign*y; +#ifndef FIXED_POINT + /* Another check in case of -ffast-math */ + if (celt_isnan(x)) + return 0; +#endif + if (x<0) + { + x=-x; + sign=-1; + } + i = (int)floor(.5f+25*x); + x -= .04f*i; + y = tansig_table[i]; + dy = 1-y*y; + y = y + x*dy*(1 - y*x); + return sign*y; } #endif #if 0 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;jtopo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;ktopo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;jtopo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;ktopo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } + int j; + opus_val16 hidden[MAX_NEURONS]; + const opus_val16 *W = m->weights; + /* Copy to tmp_in */ + for (j=0;jtopo[1];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),8); + for (k=0;ktopo[0];k++) + sum = MAC16_16(sum, in[k],*W++); + hidden[j] = tansig_approx(sum); + } + for (j=0;jtopo[2];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),14); + for (k=0;ktopo[1];k++) + sum = MAC16_16(sum, hidden[k], *W++); + out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); + } } #else void mlp_process(const MLP *m, const float *in, float *out) diff --git a/media/libopus/src/mlp.h b/media/libopus/src/mlp.h index 86c8e0617d..618e246e2c 100644 --- a/media/libopus/src/mlp.h +++ b/media/libopus/src/mlp.h @@ -31,11 +31,13 @@ #include "arch.h" typedef struct { - int layers; - const int *topo; - const float *weights; + int layers; + const int *topo; + const float *weights; } MLP; +extern const MLP net; + void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */ diff --git a/media/libopus/src/mlp_data.c b/media/libopus/src/mlp_data.c index 401c4c0250..c2fda4e2e5 100644 --- a/media/libopus/src/mlp_data.c +++ b/media/libopus/src/mlp_data.c @@ -1,6 +1,10 @@ /* The contents of this file was automatically generated by mlp_train.c It contains multi-layer perceptron (MLP) weights. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include "mlp.h" /* RMS error was 0.138320, seed was 1361535663 */ diff --git a/media/libopus/src/opus.c b/media/libopus/src/opus.c index 30890b9cbf..e9ce93b308 100644 --- a/media/libopus/src/opus.c +++ b/media/libopus/src/opus.c @@ -166,6 +166,27 @@ static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *siz } } +int opus_packet_get_samples_per_frame(const unsigned char *data, + opus_int32 Fs) +{ + int audiosize; + if (data[0]&0x80) + { + audiosize = ((data[0]>>3)&0x3); + audiosize = (Fs<>3)&0x3); + if (audiosize == 3) + audiosize = Fs*60/1000; + else + audiosize = (Fs<= 2) && !defined(__OPTIMIZE__) +#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW) # pragma message "You appear to be compiling without optimization, if so opus will be very slow." #endif @@ -59,6 +59,7 @@ struct OpusDecoder { opus_int32 Fs; /** Sampling rate (at the API level) */ silk_DecControlStruct DecControl; int decode_gain; + int arch; /* Everything beyond this point gets cleared on a reset */ #define OPUS_DECODER_RESET_START stream_channels @@ -77,12 +78,6 @@ struct OpusDecoder { opus_uint32 rangeFinal; }; -#ifdef FIXED_POINT -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} -#endif - int opus_decoder_get_size(int channels) { @@ -137,6 +132,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) st->prev_mode = 0; st->frame_size = Fs/400; + st->arch = opus_select_arch(); return OPUS_OK; } @@ -215,7 +211,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, VARDECL(opus_val16, pcm_transition_silk); int pcm_transition_celt_size; VARDECL(opus_val16, pcm_transition_celt); - opus_val16 *pcm_transition; + opus_val16 *pcm_transition=NULL; int redundant_audio_size; VARDECL(opus_val16, redundant_audio); @@ -230,6 +226,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int F2_5, F5, F10, F20; const opus_val16 *window; opus_uint32 redundant_rng = 0; + int celt_accum; ALLOC_STACK; silk_dec = (char*)st+st->silk_dec_offset; @@ -295,6 +292,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } + /* In fixed-point, we can tell CELT to do the accumulation on top of the + SILK PCM buffer. This saves some stack space. */ +#ifdef FIXED_POINT + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); +#else + celt_accum = 0; +#endif + pcm_transition_silk_size = ALLOC_NONE; pcm_transition_celt_size = ALLOC_NONE; if (data!=NULL && st->prev_mode > 0 && ( @@ -325,14 +330,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) { int lost_flag, decoded_samples; - opus_int16 *pcm_ptr = pcm_silk; + opus_int16 *pcm_ptr; +#ifdef FIXED_POINT + if (celt_accum) + pcm_ptr = pcm; + else +#endif + pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); @@ -366,7 +377,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, /* Call SILK decoder */ int first_frame = decoded_samples == 0; silk_ret = silk_Decode( silk_dec, &st->DecControl, - lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size ); + lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch ); if( silk_ret ) { if (lost_flag) { /* PLC failure should not be fatal */ @@ -462,7 +473,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL); + redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } @@ -477,25 +488,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec); + len, pcm, celt_frame_size, &dec, celt_accum); } else { unsigned char silence[2] = {0xFF, 0xFF}; - for (i=0;ichannels;i++) - pcm[i] = 0; + if (!celt_accum) + { + for (i=0;ichannels;i++) + pcm[i] = 0; + } /* For hybrid -> SILK transitions, we let the CELT MDCT do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } - if (mode != MODE_CELT_ONLY) + if (mode != MODE_CELT_ONLY && !celt_accum) { #ifdef FIXED_POINT for (i=0;ichannels;i++) - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); #else for (i=0;ichannels;i++) pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); @@ -514,7 +528,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); @@ -710,6 +724,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, { VARDECL(opus_int16, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -717,6 +732,14 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, RESTORE_STACK; return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, opus_int16); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); @@ -737,6 +760,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, { VARDECL(float, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -745,6 +769,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, float); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); @@ -904,27 +936,6 @@ int opus_packet_get_bandwidth(const unsigned char *data) return bandwidth; } -int opus_packet_get_samples_per_frame(const unsigned char *data, - opus_int32 Fs) -{ - int audiosize; - if (data[0]&0x80) - { - audiosize = ((data[0]>>3)&0x3); - audiosize = (Fs<>3)&0x3); - if (audiosize == 3) - audiosize = Fs*60/1000; - else - audiosize = (Fs<lsb_depth = 24; st->variable_duration = OPUS_FRAMESIZE_ARG; - /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead + /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead + 1.5 ms for SILK resamplers and stereo prediction) */ st->delay_compensation = st->Fs/250; @@ -242,6 +244,10 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->mode = MODE_HYBRID; st->bandwidth = OPUS_BANDWIDTH_FULLBAND; +#ifndef DISABLE_FLOAT_API + tonality_analysis_init(&st->analysis); +#endif + return OPUS_OK; } @@ -648,7 +654,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ return best_state; } -int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, +static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, float *mem, int buffering, downmix_func downmix) { @@ -660,6 +666,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bestLM=0; int subframe; int pos; + int offset; VARDECL(opus_val32, sub); subframe = Fs/400; @@ -670,9 +677,8 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, { /* Consider the CELT delay when not in restricted-lowdelay */ /* We assume the buffering is between 2.5 and 5 ms */ - int offset = 2*subframe - buffering; + offset = 2*subframe - buffering; celt_assert(offset>=0 && offset <= subframe); - x += C*offset; len -= offset; e[1]=mem[1]; e_1[1]=1.f/(EPSILON+mem[1]); @@ -681,6 +687,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, pos = 3; } else { pos=1; + offset=0; } N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); /* Just silencing a warning, it's really initialized later */ @@ -692,7 +699,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int j; tmp=EPSILON; - downmix(x, sub, subframe, i*subframe, 0, -2, C); + downmix(x, sub, subframe, i*subframe+offset, 0, -2, C); if (i==0) memx = sub[0]; for (j=0;jlsb_depth); - analysis_info.valid = 0; celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); #ifndef DISABLE_FLOAT_API + analysis_info.valid = 0; #ifdef FIXED_POINT if (st->silk_mode.complexity >= 10 && st->Fs==48000) #else @@ -997,6 +1013,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); } +#else + (void)analysis_pcm; + (void)analysis_size; #endif st->voice_ratio = -1; @@ -1377,7 +1396,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->user_forced_mode = MODE_CELT_ONLY; tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - NULL, 0, c1, c2, analysis_channels, downmix); + NULL, 0, c1, c2, analysis_channels, downmix, float_api); if (tmp_len<0) { RESTORE_STACK; @@ -1424,8 +1443,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_init(&enc, data, max_data_bytes-1); ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); - for (i=0;ichannels;i++) - pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; + OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); if (st->mode == MODE_CELT_ONLY) hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -1444,7 +1462,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } - +#ifndef FIXED_POINT + if (float_api) + { + opus_val32 sum; + sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch); + /* This should filter out both NaNs and ridiculous signals that could + cause NaNs further down. */ + if (!(sum < 1e9f) || celt_isnan(sum)) + { + OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); + st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0; + } + } +#endif /* SILK processing */ @@ -1599,8 +1630,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); - for(i=0;idelay_buffer[i]=0; + OPUS_CLEAR(st->delay_buffer, prefill_offset); #ifdef FIXED_POINT pcm_silk = st->delay_buffer; #else @@ -1727,15 +1757,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) { - for (i=0;ichannels*st->Fs/400;i++) - tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; + OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); } - for (i=0;ichannels*(st->encoder_buffer-(frame_size+total_buffer));i++) - st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; - for (;iencoder_buffer*st->channels;i++) - st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; - + if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) + { + OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); + OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], + &pcm_buf[0], + (frame_size+total_buffer)*st->channels); + } else { + OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); + } /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { @@ -1955,7 +1988,8 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra for (i=0;ichannels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); RESTORE_STACK; return ret; } @@ -1977,7 +2011,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram , st->analysis.subframe_mem #endif ); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); } #else @@ -2002,7 +2037,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram for (i=0;ichannels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); RESTORE_STACK; return ret; } @@ -2019,7 +2055,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, downmix_float, st->analysis.subframe_mem); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); } #endif @@ -2108,7 +2144,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_MAX_BANDWIDTH_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) + if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) { goto bad_arg; } @@ -2418,11 +2454,14 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { void *silk_enc; silk_EncControlStruct dummy; + char *start; silk_enc = (char*)st+st->silk_enc_offset; +#ifndef DISABLE_FLOAT_API + tonality_analysis_reset(&st->analysis); +#endif - OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, - sizeof(OpusEncoder)- - ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); + start = (char*)&st->OPUS_ENCODER_RESET_START; + OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); silk_InitEncoder( silk_enc, st->arch, &dummy ); diff --git a/media/libopus/src/opus_multistream_decoder.c b/media/libopus/src/opus_multistream_decoder.c index a05fa1e765..b95eaa6eac 100644 --- a/media/libopus/src/opus_multistream_decoder.c +++ b/media/libopus/src/opus_multistream_decoder.c @@ -75,7 +75,7 @@ int opus_multistream_decoder_init( char *ptr; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) return OPUS_BAD_ARG; st->layout.nb_channels = channels; @@ -119,7 +119,7 @@ OpusMSDecoder *opus_multistream_decoder_create( int ret; OpusMSDecoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) { if (error) *error = OPUS_BAD_ARG; diff --git a/media/libopus/src/opus_multistream_encoder.c b/media/libopus/src/opus_multistream_encoder.c index 49e27913ee..9e85773573 100644 --- a/media/libopus/src/opus_multistream_encoder.c +++ b/media/libopus/src/opus_multistream_encoder.c @@ -41,6 +41,7 @@ #include "modes.h" #include "bands.h" #include "quant_bands.h" +#include "pitch.h" typedef struct { int nb_streams; @@ -71,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)( struct OpusMSEncoder { ChannelLayout layout; + int arch; int lfe_stream; int application; int variable_duration; @@ -98,7 +100,8 @@ static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) else ptr += align(mono_size); } - return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); + /* void* cast avoids clang -Wcast-align warning */ + return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); } static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) @@ -117,7 +120,8 @@ static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) else ptr += align(mono_size); } - return (opus_val32*)ptr; + /* void* cast avoids clang -Wcast-align warning */ + return (opus_val32*)(void*)ptr; } static int validate_encoder_layout(const ChannelLayout *layout) @@ -199,7 +203,7 @@ static opus_val16 logSum(opus_val16 a, opus_val16 b) max = b; diff = SUB32(EXTEND32(b),EXTEND32(a)); } - if (diff >= QCONST16(8.f, DB_SHIFT)) + if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ return max; #ifdef FIXED_POINT low = SHR32(diff, DB_SHIFT-1); @@ -218,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b) #endif void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch ) { int c; @@ -257,7 +261,21 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b OPUS_COPY(in, mem+c*overlap, overlap); (*copy_channel_in)(x, 1, pcm, channels, c, len); celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); +#ifndef FIXED_POINT + { + opus_val32 sum; + sum = celt_inner_prod(in, in, frame_size+overlap, 0); + /* This should filter out both NaNs and ridiculous signals that could + cause NaNs further down. */ + if (!(sum < 1e9f) || celt_isnan(sum)) + { + OPUS_CLEAR(in, frame_size+overlap); + preemph_mem[c] = 0; + } + } +#endif + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, + overlap, celt_mode->maxLM-LM, 1, arch); if (upsample != 1) { int bound = len; @@ -267,7 +285,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b freq[i] = 0; } - compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) return OPUS_BAD_ARG; + st->arch = opus_select_arch(); st->layout.nb_channels = channels; st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; @@ -530,7 +549,7 @@ OpusMSEncoder *opus_multistream_encoder_create( int ret; OpusMSEncoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) { if (error) *error = OPUS_BAD_ARG; @@ -566,6 +585,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( ) { int ret; + opus_int32 size; OpusMSEncoder *st; if ((channels>255) || (channels<1)) { @@ -573,7 +593,14 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( *error = OPUS_BAD_ARG; return NULL; } - st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family)); + size = opus_multistream_surround_encoder_get_size(channels, mapping_family); + if (!size) + { + if (error) + *error = OPUS_UNIMPLEMENTED; + return NULL; + } + st = (OpusMSEncoder *)opus_alloc(size); if (st==NULL) { if (error) @@ -591,7 +618,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( return st; } -static void surround_rate_allocation( +static opus_int32 surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, int frame_size @@ -605,6 +632,7 @@ static void surround_rate_allocation( int lfe_offset; int coupled_ratio; /* Q8 */ int lfe_ratio; /* Q8 */ + opus_int32 rate_sum=0; ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); @@ -660,7 +688,10 @@ static void surround_rate_allocation( rate[i] = stream_offset+channel_rate; else rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); + rate[i] = IMAX(rate[i], 500); + rate_sum += rate[i]; } + return rate_sum; } /* Max size in case the encoder decides to return three frames */ @@ -674,7 +705,8 @@ static int opus_multistream_encode_native unsigned char *data, opus_int32 max_data_bytes, int lsb_depth, - downmix_func downmix + downmix_func downmix, + int float_api ) { opus_int32 Fs; @@ -694,6 +726,8 @@ static int opus_multistream_encode_native opus_val32 *mem = NULL; opus_val32 *preemph_mem=NULL; int frame_size; + opus_int32 rate_sum; + opus_int32 smallest_packet; ALLOC_STACK; if (st->surround) @@ -737,6 +771,14 @@ static int opus_multistream_encode_native RESTORE_STACK; return OPUS_BAD_ARG; } + + /* Smallest packet the encoder can produce. */ + smallest_packet = st->layout.nb_streams*2-1; + if (max_data_bytes < smallest_packet) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } ALLOC(buf, 2*frame_size, opus_val16); coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); @@ -744,21 +786,23 @@ static int opus_multistream_encode_native ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); if (st->surround) { - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); - } - - if (max_data_bytes < 4*st->layout.nb_streams-1) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch); } /* Compute bitrate allocation between streams (this could be a lot better) */ - surround_rate_allocation(st, bitrates, frame_size); + rate_sum = surround_rate_allocation(st, bitrates, frame_size); if (!vbr) - max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size)); - + { + if (st->bitrate_bps == OPUS_AUTO) + { + max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size)); + } else if (st->bitrate_bps != OPUS_BITRATE_MAX) + { + max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet, + 3*st->bitrate_bps/(3*8*Fs/frame_size))); + } + } ptr = (char*)st + align(sizeof(OpusMSEncoder)); for (s=0;slayout.nb_streams;s++) { @@ -843,13 +887,15 @@ static int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; - /* Reserve three bytes for the last stream and four for the others */ - curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); + /* Reserve one byte for the last stream and two for the others */ + curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1); curr_max = IMIN(curr_max,MS_FRAME_TMP); + /* Repacketizer will add one or two bytes for self-delimited frames */ + if (s != st->layout.nb_streams-1) curr_max -= curr_max>253 ? 2 : 1; if (!vbr && s == st->layout.nb_streams-1) opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, - pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api); if (len<0) { RESTORE_STACK; @@ -922,7 +968,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #ifndef DISABLE_FLOAT_API @@ -935,7 +981,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, downmix_float); + pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1); } #endif @@ -951,7 +997,7 @@ int opus_multistream_encode_float ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float); + pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1); } int opus_multistream_encode( @@ -963,7 +1009,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #endif diff --git a/media/libopus/src/opus_private.h b/media/libopus/src/opus_private.h index 83225f2b6c..3b62eed096 100644 --- a/media/libopus/src/opus_private.h +++ b/media/libopus/src/opus_private.h @@ -33,6 +33,8 @@ #include "opus.h" #include "celt.h" +#include /* offsetof */ + struct OpusRepacketizer { unsigned char toc; int nb_frames; @@ -86,10 +88,6 @@ typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix); - int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); @@ -104,16 +102,23 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, + int analysis_channels, downmix_func downmix, int float_api); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, opus_int32 *packet_offset, int soft_clip); -/* Make sure everything's aligned to sizeof(void *) bytes */ +/* Make sure everything is properly aligned. */ static OPUS_INLINE int align(int i) { - return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *); + struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;}; + + unsigned int alignment = offsetof(struct foo, u); + + /* Optimizing compilers should optimize div and multiply into and + for all sensible alignment values. */ + return ((i + alignment - 1) / alignment) * alignment; } int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, diff --git a/media/libopus/src/repacketizer.c b/media/libopus/src/repacketizer.c index a62675ce94..f27e9ab958 100644 --- a/media/libopus/src/repacketizer.c +++ b/media/libopus/src/repacketizer.c @@ -219,8 +219,9 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int } if (pad) { - for (i=ptr-data;imi; + const MODE_INFO *mode_info_context = cm->show_frame_mi; int mbr, mbc; /* The pixel thresholds are adjusted according to if or not the macroblock diff --git a/media/libyuv/source/compare_win.cc b/media/libyuv/source/compare_win.cc index 99831651f5..522e2989d2 100644 --- a/media/libyuv/source/compare_win.cc +++ b/media/libyuv/source/compare_win.cc @@ -18,7 +18,7 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) -__declspec(naked) __declspec(align(16)) +__declspec(naked) uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { __asm { mov eax, [esp + 4] // src_a @@ -60,7 +60,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { #if _MSC_VER >= 1700 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. #pragma warning(disable: 4752) -__declspec(naked) __declspec(align(16)) +__declspec(naked) uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { __asm { mov eax, [esp + 4] // src_a @@ -135,7 +135,7 @@ static uvec32 kHashMul3 = { #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ _asm _emit 0x40 _asm _emit reg -__declspec(naked) __declspec(align(16)) +__declspec(naked) uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { __asm { mov eax, [esp + 4] // src @@ -187,7 +187,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 -__declspec(naked) __declspec(align(16)) +__declspec(naked) uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { __asm { mov eax, [esp + 4] // src diff --git a/media/libyuv/source/rotate.cc b/media/libyuv/source/rotate.cc index b052ac1dc4..d130c2db93 100644 --- a/media/libyuv/source/rotate.cc +++ b/media/libyuv/source/rotate.cc @@ -76,7 +76,7 @@ void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, #if !defined(LIBYUV_DISABLE_X86) && \ defined(_M_IX86) && defined(_MSC_VER) #define HAS_TRANSPOSE_WX8_SSSE3 -__declspec(naked) __declspec(align(16)) +__declspec(naked) static void TransposeWx8_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { __asm { @@ -168,7 +168,7 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride, } #define HAS_TRANSPOSE_UVWX8_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, diff --git a/media/libyuv/source/row_win.cc b/media/libyuv/source/row_win.cc index f13e4d7ae5..ecf08b32cc 100644 --- a/media/libyuv/source/row_win.cc +++ b/media/libyuv/source/row_win.cc @@ -144,7 +144,7 @@ static const uvec8 kShuffleMaskARGBToRAW_0 = { }; // Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { __asm { mov eax, [esp + 4] // src_y @@ -172,7 +172,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { __asm { @@ -201,7 +201,7 @@ void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { __asm { mov eax, [esp + 4] // src_rgb24 @@ -240,7 +240,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { __asm { @@ -287,7 +287,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 // 20 instructions. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix) { __asm { @@ -338,7 +338,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, } // 24 instructions -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, int pix) { __asm { @@ -392,7 +392,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, } // 18 instructions. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, int pix) { __asm { @@ -431,7 +431,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { mov eax, [esp + 4] // src_argb @@ -470,7 +470,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { mov eax, [esp + 4] // src_argb @@ -509,7 +509,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { mov eax, [esp + 4] // src_argb @@ -548,7 +548,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { } // TODO(fbarchard): Improve sign extension/packing. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { mov eax, [esp + 4] // src_argb @@ -590,7 +590,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { mov eax, [esp + 4] // src_argb @@ -621,7 +621,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { } // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -656,7 +656,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -693,7 +693,7 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { #ifdef HAS_ARGBTOYROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) __declspec(align(32)) +__declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -733,7 +733,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { #ifdef HAS_ARGBTOYROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) __declspec(align(32)) +__declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -773,7 +773,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { } #endif // HAS_ARGBTOYJROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -807,7 +807,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -842,7 +842,7 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -876,7 +876,7 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -910,7 +910,7 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -944,7 +944,7 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -978,7 +978,7 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1012,7 +1012,7 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1046,7 +1046,7 @@ void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1112,7 +1112,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1180,7 +1180,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } #ifdef HAS_ARGBTOUVROW_AVX2 -__declspec(naked) __declspec(align(32)) +__declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1246,7 +1246,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, } #endif // HAS_ARGBTOUVROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1316,7 +1316,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1387,7 +1387,7 @@ void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1445,7 +1445,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1503,7 +1503,7 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUV422Row_SSSE3(const uint8* src_argb0, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1562,7 +1562,7 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1621,7 +1621,7 @@ void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1687,7 +1687,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1757,7 +1757,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1823,7 +1823,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1893,7 +1893,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -1959,7 +1959,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -2077,7 +2077,7 @@ static const lvec16 kUVBiasR_AVX = { // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToARGBRow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2276,7 +2276,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; // 8 pixels, dest aligned 16. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2319,7 +2319,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToRGB24Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2366,7 +2366,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToRAWRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2413,7 +2413,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, // 8 pixels, dest unaligned. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToRGB565Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2486,7 +2486,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2530,7 +2530,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // Similar to I420 but duplicate UV once more. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2575,7 +2575,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, @@ -2613,7 +2613,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void NV21ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, @@ -2651,7 +2651,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels, unaligned. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2694,7 +2694,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, // 8 pixels, unaligned. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2738,7 +2738,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, // 8 pixels, unaligned. // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // Similar to I420 but duplicate UV once more. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2783,7 +2783,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, @@ -2821,7 +2821,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) +__declspec(naked) void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, @@ -2857,7 +2857,7 @@ void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToBGRARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2898,7 +2898,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2939,7 +2939,7 @@ void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToABGRRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2980,7 +2980,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -3021,7 +3021,7 @@ void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -3062,7 +3062,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -3106,7 +3106,7 @@ void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, #endif // HAS_I422TOARGBROW_SSSE3 #ifdef HAS_YTOARGBROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YToARGBRow_SSE2(const uint8* y_buf, uint8* rgb_buf, int width) { @@ -3159,7 +3159,7 @@ static const uvec8 kShuffleMirror = { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3188,7 +3188,7 @@ static const ulvec8 kShuffleMirror_AVX2 = { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3215,7 +3215,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORROW_SSE2 // SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3 // version can not. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3248,7 +3248,7 @@ static const uvec8 kShuffleMirrorUV = { 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width) { __asm { @@ -3284,7 +3284,7 @@ static const uvec8 kARGBShuffleMirror = { 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3313,7 +3313,7 @@ static const ulvec32 kARGBShuffleMirror_AVX2 = { 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3336,7 +3336,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { __asm { push edi @@ -3372,7 +3372,7 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -3411,7 +3411,7 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { __asm { push edi @@ -3450,7 +3450,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) { __asm { @@ -3480,7 +3480,7 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) { __asm { @@ -3512,7 +3512,7 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, #endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) { __asm { @@ -3547,7 +3547,7 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, #ifdef HAS_COPYROW_SSE2 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { __asm { mov eax, [esp + 4] // src @@ -3570,7 +3570,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { #endif // HAS_COPYROW_SSE2 // Unaligned Multiple of 1. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { __asm { mov eax, esi @@ -3586,7 +3586,7 @@ void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { } #ifdef HAS_COPYROW_X86 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void CopyRow_X86(const uint8* src, uint8* dst, int count) { __asm { mov eax, esi @@ -3605,7 +3605,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) { #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3642,7 +3642,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3672,7 +3672,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3711,7 +3711,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3743,7 +3743,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_SETROW_X86 // SetRow8 writes 'count' bytes using a 32 bit value repeated. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SetRow_X86(uint8* dst, uint32 v32, int count) { __asm { mov edx, edi @@ -3758,7 +3758,7 @@ void SetRow_X86(uint8* dst, uint32 v32, int count) { } // SetRow32 writes 'count' words using a 32 bit value repeated. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, int dst_stride, int height) { __asm { @@ -3790,7 +3790,7 @@ void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) { __asm { @@ -3818,7 +3818,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -3863,7 +3863,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -3903,7 +3903,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix) { __asm { @@ -3929,7 +3929,7 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -3974,7 +3974,7 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4016,7 +4016,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_YUY2TOYROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { __asm { @@ -4042,7 +4042,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4086,7 +4086,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4123,7 +4123,7 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { __asm { @@ -4149,7 +4149,7 @@ void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4193,7 +4193,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4230,7 +4230,7 @@ void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { __asm { @@ -4254,7 +4254,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4298,7 +4298,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4335,7 +4335,7 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { __asm { @@ -4359,7 +4359,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4403,7 +4403,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix) { __asm { @@ -4443,7 +4443,7 @@ void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, #ifdef HAS_ARGBBLENDROW_SSE2 // Blend 8 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -4577,7 +4577,7 @@ static const uvec8 kShuffleAlpha = { // pshufb xmm3, kShuffleAlpha // alpha // Blend 8 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -4725,7 +4725,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBATTENUATEROW_SSE2 // Attenuate 4 pixels at a time. // Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4775,7 +4775,7 @@ static const uvec8 kShuffleAlpha1 = { 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4823,7 +4823,7 @@ static const ulvec8 kShuffleAlpha_AVX2 = { 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4862,7 +4862,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. // Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { @@ -4918,7 +4918,7 @@ static const ulvec8 kUnattenShuffleAlpha_AVX2 = { // TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // USE_GATHER is not on by default, due to being a slow instruction. #ifdef USE_GATHER -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { @@ -4953,7 +4953,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, } } #else // USE_GATHER -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { @@ -5021,7 +5021,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -5081,7 +5081,7 @@ static const vec8 kARGBToSepiaR = { }; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { __asm { mov eax, [esp + 4] /* dst_argb */ @@ -5139,7 +5139,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { // Same as Sepia except matrix is provided. // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, const int8* matrix_argb, int width) { __asm { @@ -5202,7 +5202,7 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). // Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width) { __asm { @@ -5249,7 +5249,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. // Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, uint32 value) { __asm { @@ -5284,7 +5284,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5324,7 +5324,7 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. // TODO(fbarchard): Port this to posix, neon and other math functions. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5373,7 +5373,7 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5403,7 +5403,7 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5441,7 +5441,7 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5471,7 +5471,7 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { __asm { @@ -5504,7 +5504,7 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, // -1 0 1 // -2 0 2 // -1 0 1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, uint8* dst_sobelx, int width) { __asm { @@ -5561,7 +5561,7 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, // -1 -2 -1 // 0 0 0 // 1 2 1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width) { __asm { @@ -5615,7 +5615,7 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, // R = Sobel // G = Sobel // B = Sobel -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width) { __asm { @@ -5663,7 +5663,7 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_y, int width) { __asm { @@ -5697,7 +5697,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, // R = Sobel X // G = Sobel // B = Sobel Y -__declspec(naked) __declspec(align(16)) +__declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width) { __asm { @@ -5991,7 +5991,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. -__declspec(naked) __declspec(align(16)) +__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width) { @@ -6078,7 +6078,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -6179,7 +6179,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, #ifdef HAS_INTERPOLATEROW_SSSE3 // Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -6286,7 +6286,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, #ifdef HAS_INTERPOLATEROW_SSE2 // Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -6398,7 +6398,7 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, #endif // HAS_INTERPOLATEROW_SSE2 // Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -6504,7 +6504,7 @@ void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, #ifdef HAS_INTERPOLATEROW_SSE2 // Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -6615,7 +6615,7 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, } #endif // HAS_INTERPOLATEROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix) { __asm { @@ -6640,7 +6640,7 @@ void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, } #ifdef HAS_HALFROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix) { __asm { @@ -6667,7 +6667,7 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, } #endif // HAS_HALFROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { __asm { @@ -6694,7 +6694,7 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, } // Specialized ARGB to Bayer that just isolates G channel. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { __asm { @@ -6725,7 +6725,7 @@ void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { __asm { @@ -6751,7 +6751,7 @@ void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { __asm { @@ -6778,7 +6778,7 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, } #ifdef HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { __asm { @@ -6807,7 +6807,7 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { __asm { @@ -6933,7 +6933,7 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, // UYVY - Macro-pixel = 2 image pixels // U0Y0V0Y1 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -6971,7 +6971,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y, } } -__declspec(naked) __declspec(align(16)) +__declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -7010,7 +7010,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, } #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb, uint8* dst_argb, const float* poly, int width) { @@ -7070,7 +7070,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb, #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb, uint8* dst_argb, const float* poly, int width) { @@ -7111,7 +7111,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb, #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { __asm { @@ -7146,7 +7146,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { __asm { push esi @@ -7178,7 +7178,7 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma, uint32 lumacoeff) { diff --git a/media/libyuv/source/scale_win.cc b/media/libyuv/source/scale_win.cc index 840b9738da..cfeaefc725 100644 --- a/media/libyuv/source/scale_win.cc +++ b/media/libyuv/source/scale_win.cc @@ -94,7 +94,7 @@ static uvec16 kScaleAb2 = // Reads 32 pixels, throws half away and writes 16 pixels. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -122,7 +122,7 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Blends 32x1 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -160,7 +160,7 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Blends 32x2 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -204,7 +204,7 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Reads 32 pixels, throws half away and writes 16 pixels. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -233,7 +233,7 @@ void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, // Blends 32x1 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -272,7 +272,7 @@ void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, // Blends 32x2 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -317,7 +317,7 @@ void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, // Point samples 32 pixels to 8 pixels. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -350,7 +350,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Blends 32x4 rectangle to 8x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -415,7 +415,7 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Note that movdqa+palign may be better than movdqu. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -464,7 +464,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, // Note that movdqa+palign may be better than movdqu. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -523,7 +523,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, // Note that movdqa+palign may be better than movdqu. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -586,7 +586,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, // 3/8 point sampler // Scale 32 pixels to 12 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { __asm { @@ -618,7 +618,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, } // Scale 16x3 pixels to 6x1 with interpolation -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -684,7 +684,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, } // Scale 16x2 pixels to 6x1 with interpolation -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -730,7 +730,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, // Reads 16xN bytes and produces 16 shorts at a time. // TODO(fbarchard): Make this handle 4xN bytes for any width ARGB. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint16* dst_ptr, int src_width, int src_height) { @@ -800,7 +800,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // when drmemory bug fixed. // https://code.google.com/p/drmemory/issues/detail?id=1396 -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { __asm { @@ -881,7 +881,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, // Reads 16 pixels, duplicates them and writes 32 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { __asm { @@ -908,7 +908,7 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { @@ -935,7 +935,7 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb, // Blends 8x1 rectangle to 4x1. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { @@ -965,7 +965,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, // Blends 8x2 rectangle to 4x1. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { @@ -1001,7 +1001,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, // Reads 4 pixels at a time. // Alignment requirement: dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, int src_stepx, uint8* dst_argb, int dst_width) { @@ -1039,7 +1039,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, // Blends four 2x2 to 4x1. // Alignment requirement: dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride, int src_stepx, @@ -1088,7 +1088,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, } // Column scaling unfiltered. SSE2 version. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) { __asm { @@ -1182,7 +1182,7 @@ static uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) { __asm { @@ -1257,7 +1257,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) +__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) { __asm { @@ -1283,7 +1283,7 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, } // Divide num by div and return as 16.16 fixed point result. -__declspec(naked) __declspec(align(16)) +__declspec(naked) int FixedDiv_X86(int num, int div) { __asm { mov eax, [esp + 4] // num @@ -1296,7 +1296,7 @@ int FixedDiv_X86(int num, int div) { } // Divide num by div and return as 16.16 fixed point result. -__declspec(naked) __declspec(align(16)) +__declspec(naked) int FixedDiv1_X86(int num, int div) { __asm { mov eax, [esp + 4] // num diff --git a/netwerk/base/BackgroundFileSaver.cpp b/netwerk/base/BackgroundFileSaver.cpp index 9d18e13817..b48fb724b6 100644 --- a/netwerk/base/BackgroundFileSaver.cpp +++ b/netwerk/base/BackgroundFileSaver.cpp @@ -1198,6 +1198,10 @@ DigestOutputStream::DigestOutputStream(nsIOutputStream* aStream, DigestOutputStream::~DigestOutputStream() { + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return; + } shutdown(calledFromObject); } @@ -1252,5 +1256,7 @@ DigestOutputStream::IsNonBlocking(bool *retval) return mOutputStream->IsNonBlocking(retval); } +#undef LOG_ENABLED + } // namespace net } // namespace mozilla diff --git a/netwerk/base/nsSocketTransportService2.h b/netwerk/base/nsSocketTransportService2.h index 033ce1368b..35b2ac6aba 100644 --- a/netwerk/base/nsSocketTransportService2.h +++ b/netwerk/base/nsSocketTransportService2.h @@ -81,10 +81,7 @@ public: NS_DECL_NSITHREADOBSERVER NS_DECL_NSIRUNNABLE NS_DECL_NSIOBSERVER - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; nsSocketTransportService(); diff --git a/netwerk/base/nsStreamTransportService.h b/netwerk/base/nsStreamTransportService.h index 723bb3df9a..61fe8e55dc 100644 --- a/netwerk/base/nsStreamTransportService.h +++ b/netwerk/base/nsStreamTransportService.h @@ -21,10 +21,7 @@ public: NS_DECL_NSISTREAMTRANSPORTSERVICE NS_DECL_NSIEVENTTARGET NS_DECL_NSIOBSERVER - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; nsresult Init(); diff --git a/netwerk/base/nsURLHelper.cpp b/netwerk/base/nsURLHelper.cpp index 69598dbe7d..9871e05eca 100644 --- a/netwerk/base/nsURLHelper.cpp +++ b/netwerk/base/nsURLHelper.cpp @@ -436,7 +436,7 @@ net_ResolveRelativePath(const nsACString &relativePath, case '#': case '?': stop = true; - // fall through... + MOZ_FALLTHROUGH; case '/': // delimiter found if (name.EqualsLiteral("..")) { diff --git a/netwerk/cache/nsDiskCacheDevice.cpp b/netwerk/cache/nsDiskCacheDevice.cpp index cf77c65833..b224ed0097 100644 --- a/netwerk/cache/nsDiskCacheDevice.cpp +++ b/netwerk/cache/nsDiskCacheDevice.cpp @@ -303,17 +303,17 @@ nsDiskCache::Hash(const char * key, PLDHashNumber initval) /*------------------------------------- handle the last 11 bytes */ c += length; switch(len) { /* all the case statements fall through */ - case 11: c += (uint32_t(k[10])<<24); - case 10: c += (uint32_t(k[9])<<16); - case 9 : c += (uint32_t(k[8])<<8); + case 11: c += (uint32_t(k[10])<<24); MOZ_FALLTHROUGH; + case 10: c += (uint32_t(k[9])<<16); MOZ_FALLTHROUGH; + case 9 : c += (uint32_t(k[8])<<8); MOZ_FALLTHROUGH; /* the low-order byte of c is reserved for the length */ - case 8 : b += (uint32_t(k[7])<<24); - case 7 : b += (uint32_t(k[6])<<16); - case 6 : b += (uint32_t(k[5])<<8); - case 5 : b += k[4]; - case 4 : a += (uint32_t(k[3])<<24); - case 3 : a += (uint32_t(k[2])<<16); - case 2 : a += (uint32_t(k[1])<<8); + case 8 : b += (uint32_t(k[7])<<24); MOZ_FALLTHROUGH; + case 7 : b += (uint32_t(k[6])<<16); MOZ_FALLTHROUGH; + case 6 : b += (uint32_t(k[5])<<8); MOZ_FALLTHROUGH; + case 5 : b += k[4]; MOZ_FALLTHROUGH; + case 4 : a += (uint32_t(k[3])<<24); MOZ_FALLTHROUGH; + case 3 : a += (uint32_t(k[2])<<16); MOZ_FALLTHROUGH; + case 2 : a += (uint32_t(k[1])<<8); MOZ_FALLTHROUGH; case 1 : a += k[0]; /* case 0: nothing left to add */ } @@ -1145,4 +1145,3 @@ nsDiskCacheDevice::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) return usage; } - diff --git a/netwerk/cache2/CacheHashUtils.cpp b/netwerk/cache2/CacheHashUtils.cpp index 8e4eda56b5..4dc298bf0a 100644 --- a/netwerk/cache2/CacheHashUtils.cpp +++ b/netwerk/cache2/CacheHashUtils.cpp @@ -56,17 +56,17 @@ CacheHash::Hash(const char *aData, uint32_t aSize, uint32_t aInitval) /*------------------------------------- handle the last 11 bytes */ c += aSize; switch(len) { /* all the case statements fall through */ - case 11: c += (uint32_t(k[10])<<24); - case 10: c += (uint32_t(k[9])<<16); - case 9 : c += (uint32_t(k[8])<<8); + case 11: c += (uint32_t(k[10])<<24); MOZ_FALLTHROUGH; + case 10: c += (uint32_t(k[9])<<16); MOZ_FALLTHROUGH; + case 9 : c += (uint32_t(k[8])<<8); MOZ_FALLTHROUGH; /* the low-order byte of c is reserved for the length */ - case 8 : b += (uint32_t(k[7])<<24); - case 7 : b += (uint32_t(k[6])<<16); - case 6 : b += (uint32_t(k[5])<<8); - case 5 : b += k[4]; - case 4 : a += (uint32_t(k[3])<<24); - case 3 : a += (uint32_t(k[2])<<16); - case 2 : a += (uint32_t(k[1])<<8); + case 8 : b += (uint32_t(k[7])<<24); MOZ_FALLTHROUGH; + case 7 : b += (uint32_t(k[6])<<16); MOZ_FALLTHROUGH; + case 6 : b += (uint32_t(k[5])<<8); MOZ_FALLTHROUGH; + case 5 : b += k[4]; MOZ_FALLTHROUGH; + case 4 : a += (uint32_t(k[3])<<24); MOZ_FALLTHROUGH; + case 3 : a += (uint32_t(k[2])<<16); MOZ_FALLTHROUGH; + case 2 : a += (uint32_t(k[1])<<8); MOZ_FALLTHROUGH; case 1 : a += k[0]; /* case 0: nothing left to add */ } @@ -156,8 +156,8 @@ CacheHash::Update(const char *aData, uint32_t aLen) } switch (aLen) { - case 3: mBuf += data[2] << 16; - case 2: mBuf += data[1] << 8; + case 3: mBuf += data[2] << 16; MOZ_FALLTHROUGH; + case 2: mBuf += data[1] << 8; MOZ_FALLTHROUGH; case 1: mBuf += data[0]; } @@ -189,4 +189,3 @@ CacheHash::GetHash16() } // namespace net } // namespace mozilla - diff --git a/netwerk/cache2/CacheIndex.cpp b/netwerk/cache2/CacheIndex.cpp index d6da7030a7..bbfe94a6f7 100644 --- a/netwerk/cache2/CacheIndex.cpp +++ b/netwerk/cache2/CacheIndex.cpp @@ -127,7 +127,7 @@ private: if (!mDoNotSearchInUpdates) { entry = mIndex->mPendingUpdates.GetEntry(*mHash); } - // no break + MOZ_FALLTHROUGH; case CacheIndex::BUILDING: case CacheIndex::UPDATING: case CacheIndex::READY: @@ -430,7 +430,7 @@ CacheIndex::Shutdown() switch (oldState) { case WRITING: index->FinishWrite(false); - // no break + MOZ_FALLTHROUGH; case READY: if (index->mIndexOnDiskIsValid && !index->mDontMarkIndexClean) { if (!sanitize && NS_FAILED(index->WriteLogToDisk())) { @@ -1125,7 +1125,7 @@ CacheIndex::HasEntry(const SHA1Sum::Hash &hash, EntryStatus *_retval, bool *_pin case READING: case WRITING: entry = index->mPendingUpdates.GetEntry(hash); - // no break + MOZ_FALLTHROUGH; case BUILDING: case UPDATING: case READY: diff --git a/netwerk/cache2/CacheStorageService.cpp b/netwerk/cache2/CacheStorageService.cpp index 0cf532a9d9..a6cfa0e7f0 100644 --- a/netwerk/cache2/CacheStorageService.cpp +++ b/netwerk/cache2/CacheStorageService.cpp @@ -420,7 +420,7 @@ private: } mPass = ITERATE_METADATA; - // no break + MOZ_FALLTHROUGH; case ITERATE_METADATA: // Now grab the context iterator. diff --git a/netwerk/cookie/nsCookieService.cpp b/netwerk/cookie/nsCookieService.cpp index 1f124da304..8cb899a7b1 100644 --- a/netwerk/cookie/nsCookieService.cpp +++ b/netwerk/cookie/nsCookieService.cpp @@ -989,6 +989,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) NS_ENSURE_SUCCESS(rv, RESULT_RETRY); } // Fall through to the next upgrade. + MOZ_FALLTHROUGH; case 2: { @@ -1047,6 +1048,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) NS_ENSURE_SUCCESS(rv, RESULT_RETRY); } // Fall through to the next upgrade. + MOZ_FALLTHROUGH; case 3: { @@ -1143,6 +1145,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) NS_ENSURE_SUCCESS(rv, RESULT_RETRY); } // Fall through to the next upgrade. + MOZ_FALLTHROUGH; case 4: { @@ -1190,6 +1193,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) ("Upgraded database to schema version 5")); } // Fall through to the next upgrade. + MOZ_FALLTHROUGH; case 5: { @@ -1255,6 +1259,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) COOKIE_LOGSTRING(LogLevel::Debug, ("Upgraded database to schema version 6")); } + MOZ_FALLTHROUGH; case 6: { @@ -1314,6 +1319,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) // No more upgrades. Update the schema version. rv = mDefaultDBState->dbConn->SetSchemaVersion(COOKIES_SCHEMA_VERSION); NS_ENSURE_SUCCESS(rv, RESULT_RETRY); + MOZ_FALLTHROUGH; case COOKIES_SCHEMA_VERSION: break; @@ -1321,7 +1327,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) case 0: { NS_WARNING("couldn't get schema version!"); - + // the table may be usable; someone might've just clobbered the schema // version. we can treat this case like a downgrade using the codepath // below, by verifying the columns we care about are all there. for now, @@ -1331,6 +1337,7 @@ nsCookieService::TryInitDB(bool aRecreateDB) NS_ENSURE_SUCCESS(rv, RESULT_RETRY); } // fall through to downgrade check + MOZ_FALLTHROUGH; // downgrading. // if columns have been added to the table, we can still use the ones we diff --git a/netwerk/protocol/http/Http2Stream.cpp b/netwerk/protocol/http/Http2Stream.cpp index 66f69d5d13..29d244b3b4 100644 --- a/netwerk/protocol/http/Http2Stream.cpp +++ b/netwerk/protocol/http/Http2Stream.cpp @@ -1339,7 +1339,7 @@ Http2Stream::OnReadSegment(const char *buf, mRequestBodyLenRemaining -= dataLength; GenerateDataFrameHeader(dataLength, !mRequestBodyLenRemaining); ChangeState(SENDING_BODY); - // NO BREAK + MOZ_FALLTHROUGH; case SENDING_BODY: MOZ_ASSERT(mTxInlineFrameUsed, "OnReadSegment Send Data Header 0b"); @@ -1453,4 +1453,3 @@ Http2Stream::MapStreamToHttpConnection() } // namespace net } // namespace mozilla - diff --git a/netwerk/protocol/http/SpdyStream31.cpp b/netwerk/protocol/http/SpdyStream31.cpp index d5d40da54e..d80e61790d 100644 --- a/netwerk/protocol/http/SpdyStream31.cpp +++ b/netwerk/protocol/http/SpdyStream31.cpp @@ -1529,7 +1529,7 @@ SpdyStream31::OnReadSegment(const char *buf, mRequestBodyLenRemaining -= dataLength; GenerateDataFrameHeader(dataLength, !mRequestBodyLenRemaining); ChangeState(SENDING_REQUEST_BODY); - // NO BREAK + MOZ_FALLTHROUGH; case SENDING_REQUEST_BODY: MOZ_ASSERT(mTxInlineFrameUsed, "OnReadSegment Send Data Header 0b"); diff --git a/netwerk/protocol/http/nsHttpTransaction.cpp b/netwerk/protocol/http/nsHttpTransaction.cpp index c83818b3f1..0f4198e9b8 100644 --- a/netwerk/protocol/http/nsHttpTransaction.cpp +++ b/netwerk/protocol/http/nsHttpTransaction.cpp @@ -1629,7 +1629,8 @@ nsHttpTransaction::HandleContentStart() // check if this is a no-content response switch (mResponseHead->Status()) { case 101: - mPreserveStream = true; // fall through to other no content + mPreserveStream = true; + MOZ_FALLTHROUGH; // to other no content cases: case 204: case 205: case 304: diff --git a/netwerk/protocol/websocket/WebSocketChannel.cpp b/netwerk/protocol/websocket/WebSocketChannel.cpp index 2a9fe5347b..eb56608a82 100644 --- a/netwerk/protocol/websocket/WebSocketChannel.cpp +++ b/netwerk/protocol/websocket/WebSocketChannel.cpp @@ -2110,6 +2110,7 @@ WebSocketChannel::PrimeNewOutgoingMessage() msgType = kMsgTypeBinaryString; // no break: fall down into binary string case + MOZ_FALLTHROUGH; case kMsgTypeBinaryString: mOutHeader[0] = kFinalFragBit | nsIWebSocketFrame::OPCODE_BINARY; diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp index 6b0ee38f43..62d011166d 100644 --- a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp @@ -53,6 +53,7 @@ mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo, break; } // else fall through + MOZ_FALLTHROUGH; default: aStringToAppendTo += ch; } @@ -100,6 +101,7 @@ mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) break; } // else fall through + MOZ_FALLTHROUGH; default: i++; } @@ -511,7 +513,7 @@ mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const u { case '@': state[RFC2396E] = unchecked; - // no break here + MOZ_FALLTHROUGH; case '.': state[abbreviated] = unchecked; break; diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp index d3f92ad745..e4a9e1e6da 100644 --- a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp +++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp @@ -126,7 +126,6 @@ nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsISupports *aContext, status = NS_ERROR_NET_PARTIAL_TRANSFER; } if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) { - uint32_t waste; nsCOMPtr fpChannel = do_QueryInterface(request); bool isPending = false; if (request) { @@ -135,7 +134,9 @@ nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsISupports *aContext, if (fpChannel && !isPending) { fpChannel->ForcePending(true); } - status = BrotliHandler(nullptr, this, nullptr, 0, 0, &waste); + if (mBrotli->mTotalOut == 0 && !BrotliDecoderIsFinished(&mBrotli->mState)) { + status = NS_ERROR_INVALID_CONTENT_ENCODING; + } if (fpChannel && !isPending) { fpChannel->ForcePending(false); } @@ -149,6 +150,7 @@ NS_METHOD nsHTTPCompressConv::BrotliHandler(nsIInputStream *stream, void *closure, const char *dataIn, uint32_t, uint32_t aAvail, uint32_t *countRead) { + MOZ_ASSERT(stream); nsHTTPCompressConv *self = static_cast(closure); *countRead = 0; @@ -255,7 +257,7 @@ nsHTTPCompressConv::OnDataAvailable(nsIRequest* request, return NS_OK; } - // FALLTHROUGH + MOZ_FALLTHROUGH; case HTTP_COMPRESS_DEFLATE: diff --git a/release/docker/beet-mover/Dockerfile b/release/docker/beet-mover/Dockerfile new file mode 100644 index 0000000000..f109c0e247 --- /dev/null +++ b/release/docker/beet-mover/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:vivid + +RUN apt-get -q update \ + && apt-get install --yes -q \ + mercurial \ + python-dev \ + python-pip \ + python-virtualenv \ + libffi-dev \ + libssl-dev \ + libyaml-dev \ + libmysqlclient-dev \ + clamav \ + clamav-freshclam \ + curl \ + wget \ + && apt-get clean + +RUN freshclam --verbose diff --git a/security/manager/ssl/nsCertOverrideService.cpp b/security/manager/ssl/nsCertOverrideService.cpp index 80a6bb7881..f5dbb86c2d 100644 --- a/security/manager/ssl/nsCertOverrideService.cpp +++ b/security/manager/ssl/nsCertOverrideService.cpp @@ -396,19 +396,10 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName, if (NS_FAILED(rv)) return rv; - char *dbkey = nullptr; - rv = aCert->GetDbKey(&dbkey); - if (NS_FAILED(rv) || !dbkey) + nsAutoCString dbkey; + rv = aCert->GetDbKey(dbkey); + if (NS_FAILED(rv)) { return rv; - - // change \n and \r to spaces in the possibly multi-line-base64-encoded key - for (char *dbkey_walk = dbkey; - *dbkey_walk; - ++dbkey_walk) { - char c = *dbkey_walk; - if (c == '\r' || c == '\n') { - *dbkey_walk = ' '; - } } { @@ -419,11 +410,10 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName, aTemporary, mDottedOidForStoringNewHashes, fpStr, (nsCertOverride::OverrideBits)aOverrideBits, - nsDependentCString(dbkey)); + dbkey); Write(); } - PR_Free(dbkey); return NS_OK; } @@ -551,6 +541,8 @@ nsCertOverrideService::AddEntryToList(const nsACString &aHostName, int32_t aPort settings.mFingerprint = fingerprint; settings.mOverrideBits = ob; settings.mDBKey = dbKey; + // remove whitespace from stored dbKey for backwards compatibility + settings.mDBKey.StripWhitespace(); settings.mCert = aCert; } @@ -583,51 +575,14 @@ nsCertOverrideService::ClearValidityOverride(const nsACString & aHostName, int32 } static bool -matchesDBKey(nsIX509Cert *cert, const char *match_dbkey) +matchesDBKey(nsIX509Cert* cert, const nsCString& matchDbKey) { - char *dbkey = nullptr; - nsresult rv = cert->GetDbKey(&dbkey); - if (NS_FAILED(rv) || !dbkey) + nsAutoCString dbKey; + nsresult rv = cert->GetDbKey(dbKey); + if (NS_FAILED(rv)) { return false; - - bool found_mismatch = false; - const char *key1 = dbkey; - const char *key2 = match_dbkey; - - // skip over any whitespace when comparing - while (*key1 && *key2) { - char c1 = *key1; - char c2 = *key2; - - switch (c1) { - case ' ': - case '\t': - case '\n': - case '\r': - ++key1; - continue; - } - - switch (c2) { - case ' ': - case '\t': - case '\n': - case '\r': - ++key2; - continue; - } - - if (c1 != c2) { - found_mismatch = true; - break; - } - - ++key1; - ++key2; } - - PR_Free(dbkey); - return !found_mismatch; + return dbKey.Equals(matchDbKey); } NS_IMETHODIMP @@ -651,7 +606,7 @@ nsCertOverrideService::IsCertUsedForOverrides(nsIX509Cert *aCert, still_ok = false; } - if (still_ok && matchesDBKey(aCert, settings.mDBKey.get())) { + if (still_ok && matchesDBKey(aCert, settings.mDBKey)) { nsAutoCString cert_fingerprint; nsresult rv = NS_ERROR_UNEXPECTED; if (settings.mFingerprintAlgOID.Equals(mDottedOidForStoringNewHashes)) { @@ -681,7 +636,7 @@ nsCertOverrideService::EnumerateCertOverrides(nsIX509Cert *aCert, if (!aCert) { aEnumerator(settings, aUserData); } else { - if (matchesDBKey(aCert, settings.mDBKey.get())) { + if (matchesDBKey(aCert, settings.mDBKey)) { nsAutoCString cert_fingerprint; nsresult rv = NS_ERROR_UNEXPECTED; if (settings.mFingerprintAlgOID.Equals(mDottedOidForStoringNewHashes)) { diff --git a/security/manager/ssl/nsClientAuthRemember.cpp b/security/manager/ssl/nsClientAuthRemember.cpp index e955ae291e..9e165740ea 100644 --- a/security/manager/ssl/nsClientAuthRemember.cpp +++ b/security/manager/ssl/nsClientAuthRemember.cpp @@ -102,29 +102,26 @@ nsClientAuthRememberService::RememberDecision(const nsACString & aHostName, { // aClientCert == nullptr means: remember that user does not want to use a cert NS_ENSURE_ARG_POINTER(aServerCert); - if (aHostName.IsEmpty()) + if (aHostName.IsEmpty()) { return NS_ERROR_INVALID_ARG; + } nsAutoCString fpStr; nsresult rv = GetCertFingerprintByOidTag(aServerCert, SEC_OID_SHA256, fpStr); - if (NS_FAILED(rv)) + if (NS_FAILED(rv)) { return rv; + } { ReentrantMonitorAutoEnter lock(monitor); if (aClientCert) { RefPtr pipCert(new nsNSSCertificate(aClientCert)); - char *dbkey = nullptr; - rv = pipCert->GetDbKey(&dbkey); - if (NS_SUCCEEDED(rv) && dbkey) { - AddEntryToList(aHostName, fpStr, - nsDependentCString(dbkey)); + nsAutoCString dbkey; + rv = pipCert->GetDbKey(dbkey); + if (NS_SUCCEEDED(rv)) { + AddEntryToList(aHostName, fpStr, dbkey); } - if (dbkey) { - PORT_Free(dbkey); - } - } - else { + } else { nsCString empty; AddEntryToList(aHostName, fpStr, empty); } diff --git a/security/manager/ssl/nsCryptoHash.cpp b/security/manager/ssl/nsCryptoHash.cpp index de2a4e3ef0..b917a2d785 100644 --- a/security/manager/ssl/nsCryptoHash.cpp +++ b/security/manager/ssl/nsCryptoHash.cpp @@ -301,7 +301,7 @@ nsCryptoHMAC::Init(uint32_t aAlgorithm, nsIKeyObject *aKeyObject) PK11SymKey* key; // GetKeyObj doesn't addref the key - rv = aKeyObject->GetKeyObj((void**)&key); + rv = aKeyObject->GetKeyObj(&key); NS_ENSURE_SUCCESS(rv, rv); SECItem rawData; diff --git a/security/manager/ssl/nsIKeyModule.idl b/security/manager/ssl/nsIKeyModule.idl index 63a775140f..d9e1d11388 100644 --- a/security/manager/ssl/nsIKeyModule.idl +++ b/security/manager/ssl/nsIKeyModule.idl @@ -4,46 +4,34 @@ #include "nsISupports.idl" +%{ C++ + /* forward declaration */ + typedef struct PK11SymKeyStr PK11SymKey; +%} +[ptr] native PK11SymKeyPtr(PK11SymKey); + // An opaque key object. -[scriptable, uuid(4b31f4ed-9424-4710-b946-79b7e33cf3a8)] +[scriptable, uuid(ee2dc516-ba7b-4e77-89fe-c43b886ef715)] interface nsIKeyObject : nsISupports { // Key types const short SYM_KEY = 1; - const short PRIVATE_KEY = 2; - const short PUBLIC_KEY = 3; // Algorithm types - const short RC4 = 1; - const short AES_CBC = 2; const short HMAC = 257; - // aAlgorithm is an algorithm type - // aKey is either a PK11SymKey, SECKEYPublicKey, or a SECKEYPrivateKey. // The nsIKeyObject will take ownership of the key and be responsible // for freeing the key memory when destroyed. - [noscript] void initKey(in short aAlgorithm, in voidPtr aKey); + [noscript] void initKey(in short aAlgorithm, in PK11SymKeyPtr aKey); - // Return a pointer to the underlying key object - [noscript] voidPtr getKeyObj(); + // Returns a pointer to the underlying key object. + [noscript] PK11SymKeyPtr getKeyObj(); - // Will return NS_ERROR_NOT_INITIALIZED if initKey hasn't been run short getType(); }; -[scriptable, uuid(264eb54d-e20d-49a0-890c-1a5986ea81c4)] +[scriptable, uuid(838bdbf1-8244-448f-8bcd-cede70227d75)] interface nsIKeyObjectFactory : nsISupports { - nsIKeyObject lookupKeyByName(in ACString aName); - - nsIKeyObject unwrapKey(in short aAlgorithm, - [const, array, size_is(aWrappedKeyLen)] in octet aWrappedKey, - in unsigned long aWrappedKeyLen); - - // TODO: deriveKeyFrom* - - - // DO NOT USE - // This is not FIPS compliant and should not be used. nsIKeyObject keyFromString(in short aAlgorithm, in ACString aKey); }; diff --git a/security/manager/ssl/nsIX509Cert.idl b/security/manager/ssl/nsIX509Cert.idl index 9e9dce79ff..c643e6c88e 100644 --- a/security/manager/ssl/nsIX509Cert.idl +++ b/security/manager/ssl/nsIX509Cert.idl @@ -20,7 +20,7 @@ interface nsICertVerificationListener; /** * This represents a X.509 certificate. */ -[scriptable, uuid(f8ed8364-ced9-4c6e-86ba-48af53c393e6)] +[scriptable, uuid(bdc3979a-5422-4cd5-8589-696b6e96ea83)] interface nsIX509Cert : nsISupports { /** @@ -130,7 +130,7 @@ interface nsIX509Cert : nsISupports { /** * A unique identifier of this certificate within the local storage. */ - readonly attribute string dbKey; + readonly attribute ACString dbKey; /** * A human readable identifier to label this certificate. diff --git a/security/manager/ssl/nsKeyModule.cpp b/security/manager/ssl/nsKeyModule.cpp index 40cb36005e..f4e8c35ff0 100644 --- a/security/manager/ssl/nsKeyModule.cpp +++ b/security/manager/ssl/nsKeyModule.cpp @@ -2,11 +2,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "nsComponentManagerUtils.h" #include "nsCOMPtr.h" +#include "nsComponentManagerUtils.h" #include "nsKeyModule.h" #include "nsString.h" -#include "ScopedNSSTypes.h" using namespace mozilla; using namespace mozilla::psm; @@ -14,107 +13,80 @@ using namespace mozilla::psm; NS_IMPL_ISUPPORTS(nsKeyObject, nsIKeyObject) nsKeyObject::nsKeyObject() - : mKeyType(0), mSymKey(nullptr), mPrivateKey(nullptr), - mPublicKey(nullptr) + : mSymKey(nullptr) { } nsKeyObject::~nsKeyObject() { - CleanUp(); + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return; + } + destructorSafeDestroyNSSReference(); + shutdown(calledFromObject); } void -nsKeyObject::CleanUp() +nsKeyObject::virtualDestroyNSSReference() { - switch (mKeyType) { - case nsIKeyObject::SYM_KEY: - PK11_FreeSymKey(mSymKey); - break; - - case nsIKeyObject::PRIVATE_KEY: - PK11_DeleteTokenPrivateKey(mPrivateKey, true /* force */); - break; + destructorSafeDestroyNSSReference(); +} - case nsIKeyObject::PUBLIC_KEY: - PK11_DeleteTokenPublicKey(mPublicKey); - break; - - default: - // probably not initialized, do nothing - break; - } - mKeyType = 0; +void +nsKeyObject::destructorSafeDestroyNSSReference() +{ + mSymKey = nullptr; } ////////////////////////////////////////////////////////////////////////////// // nsIKeyObject NS_IMETHODIMP -nsKeyObject::InitKey(int16_t aAlgorithm, void * aKey) +nsKeyObject::InitKey(int16_t aAlgorithm, PK11SymKey* aKey) { - // Clear previous key data if it exists - CleanUp(); - - switch (aAlgorithm) { - case nsIKeyObject::RC4: - case nsIKeyObject::HMAC: - mSymKey = reinterpret_cast(aKey); - - if (!mSymKey) { - NS_ERROR("no symkey"); - break; - } - mKeyType = nsIKeyObject::SYM_KEY; - break; - - case nsIKeyObject::AES_CBC: - return NS_ERROR_NOT_IMPLEMENTED; - - default: - return NS_ERROR_INVALID_ARG; + if (!aKey || aAlgorithm != nsIKeyObject::HMAC) { + return NS_ERROR_INVALID_ARG; } - // One of these should have been created - if (!mSymKey && !mPrivateKey && !mPublicKey) - return NS_ERROR_FAILURE; + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return NS_ERROR_NOT_AVAILABLE; + } + mSymKey = aKey; return NS_OK; } NS_IMETHODIMP -nsKeyObject::GetKeyObj(void * *_retval) +nsKeyObject::GetKeyObj(PK11SymKey** _retval) { - if (mKeyType == 0) - return NS_ERROR_NOT_INITIALIZED; - - switch (mKeyType) { - case nsIKeyObject::SYM_KEY: - *_retval = (void*)mSymKey; - break; - - case nsIKeyObject::PRIVATE_KEY: - *_retval = (void*)mPublicKey; - break; - - case nsIKeyObject::PUBLIC_KEY: - *_retval = (void*)mPrivateKey; - break; - - default: - // unknown key type? How did that happen? - return NS_ERROR_FAILURE; + if (!_retval) { + return NS_ERROR_INVALID_ARG; } + + *_retval = nullptr; + + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return NS_ERROR_NOT_AVAILABLE; + } + + if (!mSymKey) { + return NS_ERROR_NOT_INITIALIZED; + } + + *_retval = mSymKey; return NS_OK; } NS_IMETHODIMP nsKeyObject::GetType(int16_t *_retval) { - if (mKeyType == 0) - return NS_ERROR_NOT_INITIALIZED; - - *_retval = mKeyType; + if (!_retval) { + return NS_ERROR_INVALID_ARG; + } + *_retval = nsIKeyObject::SYM_KEY; return NS_OK; } @@ -127,46 +99,37 @@ nsKeyObjectFactory::nsKeyObjectFactory() { } -NS_IMETHODIMP -nsKeyObjectFactory::LookupKeyByName(const nsACString & aName, - nsIKeyObject **_retval) +nsKeyObjectFactory::~nsKeyObjectFactory() { - return NS_ERROR_NOT_IMPLEMENTED; -} - -NS_IMETHODIMP -nsKeyObjectFactory::UnwrapKey(int16_t aAlgorithm, const uint8_t *aWrappedKey, - uint32_t aWrappedKeyLen, nsIKeyObject **_retval) -{ - return NS_ERROR_NOT_IMPLEMENTED; + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return; + } + shutdown(calledFromObject); } NS_IMETHODIMP -nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString & aKey, - nsIKeyObject **_retval) +nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString& aKey, + nsIKeyObject** _retval) { - CK_MECHANISM_TYPE cipherMech; - CK_ATTRIBUTE_TYPE cipherOperation; - switch (aAlgorithm) - { - case nsIKeyObject::HMAC: - cipherMech = CKM_GENERIC_SECRET_KEY_GEN; - cipherOperation = CKA_SIGN; - break; - - case nsIKeyObject::RC4: - cipherMech = CKM_RC4; - cipherOperation = CKA_ENCRYPT; - break; - - default: + if (!_retval || aAlgorithm != nsIKeyObject::HMAC) { return NS_ERROR_INVALID_ARG; } + nsNSSShutDownPreventionLock locker; + if (isAlreadyShutDown()) { + return NS_ERROR_NOT_AVAILABLE; + } + + CK_MECHANISM_TYPE cipherMech = CKM_GENERIC_SECRET_KEY_GEN; + CK_ATTRIBUTE_TYPE cipherOperation = CKA_SIGN; + nsresult rv; - nsCOMPtr key = - do_CreateInstance(NS_KEYMODULEOBJECT_CONTRACTID, &rv); - NS_ENSURE_SUCCESS(rv, rv); + nsCOMPtr key( + do_CreateInstance(NS_KEYMODULEOBJECT_CONTRACTID, &rv)); + if (NS_FAILED(rv)) { + return rv; + } // Convert the raw string into a SECItem const nsCString& flatKey = PromiseFlatCString(aKey); @@ -176,18 +139,20 @@ nsKeyObjectFactory::KeyFromString(int16_t aAlgorithm, const nsACString & aKey, ScopedPK11SlotInfo slot(PK11_GetBestSlot(cipherMech, nullptr)); if (!slot) { - NS_ERROR("no slot"); return NS_ERROR_FAILURE; } - PK11SymKey* symKey = PK11_ImportSymKey(slot, cipherMech, PK11_OriginUnwrap, - cipherOperation, &keyItem, nullptr); + ScopedPK11SymKey symKey(PK11_ImportSymKey(slot, cipherMech, + PK11_OriginUnwrap, cipherOperation, + &keyItem, nullptr)); if (!symKey) { return NS_ERROR_FAILURE; } - - rv = key->InitKey(aAlgorithm, (void*)symKey); - NS_ENSURE_SUCCESS(rv, rv); + + rv = key->InitKey(aAlgorithm, symKey.forget()); + if (NS_FAILED(rv)) { + return rv; + } key.swap(*_retval); return NS_OK; diff --git a/security/manager/ssl/nsKeyModule.h b/security/manager/ssl/nsKeyModule.h index b1e6c2d38a..7c844be073 100644 --- a/security/manager/ssl/nsKeyModule.h +++ b/security/manager/ssl/nsKeyModule.h @@ -2,25 +2,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef _NS_KEYMODULE_H_ -#define _NS_KEYMODULE_H_ +#ifndef nsKeyModule_h +#define nsKeyModule_h +#include "ScopedNSSTypes.h" #include "nsIKeyModule.h" +#include "nsNSSShutDown.h" #include "pk11pub.h" -#include "mozilla/Attributes.h" -/* eae599aa-ecef-49c6-a8af-6ddcc6feb484 */ #define NS_KEYMODULEOBJECT_CID \ -{ 0xeae599aa, 0xecef, 0x49c6, {0xa8, 0xaf, 0x6d, 0xdc, 0xc6, 0xfe, 0xb4, 0x84} } +{ 0x9d383ddd, 0x6856, 0x4187, {0x84, 0x85, 0xf3, 0x61, 0x95, 0xb2, 0x9a, 0x0e} } #define NS_KEYMODULEOBJECT_CONTRACTID "@mozilla.org/security/keyobject;1" -/* a39e0e9d-e567-41e3-b12c-5df67f18174d */ #define NS_KEYMODULEOBJECTFACTORY_CID \ -{ 0xa39e0e9d, 0xe567, 0x41e3, {0xb1, 0x2c, 0x5d, 0xf6, 0x7f, 0x18, 0x17, 0x4d} } +{ 0x2a35dd47, 0xb026, 0x4e8d, {0xb6, 0xb7, 0x57, 0x40, 0xf6, 0x1a, 0xb9, 0x02} } #define NS_KEYMODULEOBJECTFACTORY_CONTRACTID \ "@mozilla.org/security/keyobjectfactory;1" class nsKeyObject final : public nsIKeyObject + , public nsNSSShutDownObject { public: nsKeyObject(); @@ -30,24 +30,19 @@ public: private: ~nsKeyObject(); - + // Disallow copy constructor nsKeyObject(nsKeyObject&); - // 0 if not yet set, otherwise one of the nsIKeyObject::*KEY values - uint32_t mKeyType; - - // A union of our possible key types - PK11SymKey* mSymKey; - SECKEYPrivateKey* mPrivateKey; - SECKEYPublicKey* mPublicKey; + ScopedPK11SymKey mSymKey; - // Helper method to free memory used by keys. - void CleanUp(); + virtual void virtualDestroyNSSReference() override; + void destructorSafeDestroyNSSReference(); }; class nsKeyObjectFactory final : public nsIKeyObjectFactory + , public nsNSSShutDownObject { public: nsKeyObjectFactory(); @@ -56,10 +51,13 @@ public: NS_DECL_NSIKEYOBJECTFACTORY private: - ~nsKeyObjectFactory() {} + ~nsKeyObjectFactory(); // Disallow copy constructor nsKeyObjectFactory(nsKeyObjectFactory&); + + // No NSS resources to release. + virtual void virtualDestroyNSSReference() override {} }; -#endif // _NS_KEYMODULE_H_ +#endif // nsKeyModule_h diff --git a/security/manager/ssl/nsNSSCertificate.cpp b/security/manager/ssl/nsNSSCertificate.cpp index 76a9836f75..17daac89ab 100644 --- a/security/manager/ssl/nsNSSCertificate.cpp +++ b/security/manager/ssl/nsNSSCertificate.cpp @@ -50,6 +50,10 @@ #include "ssl.h" #include "plbase64.h" +#ifdef XP_WIN +#include // for htonl +#endif + using namespace mozilla; using namespace mozilla::psm; @@ -486,32 +490,37 @@ nsNSSCertificate::FormatUIStrings(const nsAutoString& nickname, } NS_IMETHODIMP -nsNSSCertificate::GetDbKey(char** aDbKey) +nsNSSCertificate::GetDbKey(nsACString& aDbKey) { nsNSSShutDownPreventionLock locker; - if (isAlreadyShutDown()) + if (isAlreadyShutDown()) { return NS_ERROR_NOT_AVAILABLE; + } - SECItem key; + static_assert(sizeof(uint64_t) == 8, "type size sanity check"); + static_assert(sizeof(uint32_t) == 4, "type size sanity check"); + // The format of the key is the base64 encoding of the following: + // 4 bytes: {0, 0, 0, 0} (this was intended to be the module ID, but it was + // never implemented) + // 4 bytes: {0, 0, 0, 0} (this was intended to be the slot ID, but it was + // never implemented) + // 4 bytes: + // 4 bytes: + // n bytes: + // m bytes: + nsAutoCString buf; + const char leadingZeroes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + buf.Append(leadingZeroes, sizeof(leadingZeroes)); + uint32_t serialNumberLen = htonl(mCert->serialNumber.len); + buf.Append(reinterpret_cast(&serialNumberLen), sizeof(uint32_t)); + uint32_t issuerLen = htonl(mCert->derIssuer.len); + buf.Append(reinterpret_cast(&issuerLen), sizeof(uint32_t)); + buf.Append(reinterpret_cast(mCert->serialNumber.data), + mCert->serialNumber.len); + buf.Append(reinterpret_cast(mCert->derIssuer.data), + mCert->derIssuer.len); - NS_ENSURE_ARG(aDbKey); - *aDbKey = nullptr; - key.len = NS_NSS_LONG*4+mCert->serialNumber.len+mCert->derIssuer.len; - key.data = (unsigned char*) moz_xmalloc(key.len); - if (!key.data) - return NS_ERROR_OUT_OF_MEMORY; - NS_NSS_PUT_LONG(0,key.data); // later put moduleID - NS_NSS_PUT_LONG(0,&key.data[NS_NSS_LONG]); // later put slotID - NS_NSS_PUT_LONG(mCert->serialNumber.len,&key.data[NS_NSS_LONG*2]); - NS_NSS_PUT_LONG(mCert->derIssuer.len,&key.data[NS_NSS_LONG*3]); - memcpy(&key.data[NS_NSS_LONG*4], mCert->serialNumber.data, - mCert->serialNumber.len); - memcpy(&key.data[NS_NSS_LONG*4+mCert->serialNumber.len], - mCert->derIssuer.data, mCert->derIssuer.len); - - *aDbKey = NSSBase64_EncodeItem(nullptr, nullptr, 0, &key); - free(key.data); // SECItem is a 'c' type without a destructor - return (*aDbKey) ? NS_OK : NS_ERROR_FAILURE; + return Base64Encode(buf, aDbKey); } NS_IMETHODIMP diff --git a/security/manager/ssl/nsNSSCertificate.h b/security/manager/ssl/nsNSSCertificate.h index ba8f29e302..621df9a3fb 100644 --- a/security/manager/ssl/nsNSSCertificate.h +++ b/security/manager/ssl/nsNSSCertificate.h @@ -132,17 +132,6 @@ private: void operator=(const nsNSSCertListEnumerator&) = delete; }; - -#define NS_NSS_LONG 4 -#define NS_NSS_GET_LONG(x) ((((unsigned long)((x)[0])) << 24) | \ - (((unsigned long)((x)[1])) << 16) | \ - (((unsigned long)((x)[2])) << 8) | \ - ((unsigned long)((x)[3])) ) -#define NS_NSS_PUT_LONG(src,dest) (dest)[0] = (((src) >> 24) & 0xff); \ - (dest)[1] = (((src) >> 16) & 0xff); \ - (dest)[2] = (((src) >> 8) & 0xff); \ - (dest)[3] = ((src) & 0xff); - #define NS_X509CERT_CID { /* 660a3226-915c-4ffb-bb20-8985a632df05 */ \ 0x660a3226, \ 0x915c, \ diff --git a/security/manager/ssl/nsNSSCertificateDB.cpp b/security/manager/ssl/nsNSSCertificateDB.cpp index 4234f59791..afab571cde 100644 --- a/security/manager/ssl/nsNSSCertificateDB.cpp +++ b/security/manager/ssl/nsNSSCertificateDB.cpp @@ -48,6 +48,10 @@ #include "ssl.h" #include "plbase64.h" +#ifdef XP_WIN +#include // for ntohl +#endif + using namespace mozilla; using namespace mozilla::psm; using mozilla::psm::SharedSSLState; @@ -137,42 +141,58 @@ nsNSSCertificateDB::FindCertByDBKey(const char *aDBkey, nsISupports *aToken, return NS_ERROR_NOT_AVAILABLE; } - SECItem keyItem = {siBuffer, nullptr, 0}; - SECItem *dummy; + static_assert(sizeof(uint64_t) == 8, "type size sanity check"); + static_assert(sizeof(uint32_t) == 4, "type size sanity check"); + // (From nsNSSCertificate::GetDbKey) + // The format of the key is the base64 encoding of the following: + // 4 bytes: {0, 0, 0, 0} (this was intended to be the module ID, but it was + // never implemented) + // 4 bytes: {0, 0, 0, 0} (this was intended to be the slot ID, but it was + // never implemented) + // 4 bytes: + // 4 bytes: + // n bytes: + // m bytes: + nsAutoCString decoded; + nsAutoCString tmpDBKey(aDBkey); + // Filter out any whitespace for backwards compatibility. + tmpDBKey.StripWhitespace(); + nsresult rv = Base64Decode(tmpDBKey, decoded); + if (NS_FAILED(rv)) { + return rv; + } + if (decoded.Length() < 16) { + return NS_ERROR_ILLEGAL_INPUT; + } + const char* reader = decoded.BeginReading(); + uint64_t zeroes = *reinterpret_cast(reader); + if (zeroes != 0) { + return NS_ERROR_ILLEGAL_INPUT; + } + reader += sizeof(uint64_t); + uint32_t serialNumberLen = ntohl(*reinterpret_cast(reader)); + reader += sizeof(uint32_t); + uint32_t issuerLen = ntohl(*reinterpret_cast(reader)); + reader += sizeof(uint32_t); + if (decoded.Length() != 16ULL + serialNumberLen + issuerLen) { + return NS_ERROR_ILLEGAL_INPUT; + } CERTIssuerAndSN issuerSN; - //unsigned long moduleID,slotID; + issuerSN.serialNumber.len = serialNumberLen; + issuerSN.serialNumber.data = (unsigned char*)reader; + reader += serialNumberLen; + issuerSN.derIssuer.len = issuerLen; + issuerSN.derIssuer.data = (unsigned char*)reader; + reader += issuerLen; + MOZ_ASSERT(reader == decoded.EndReading()); - dummy = NSSBase64_DecodeBuffer(nullptr, &keyItem, aDBkey, - (uint32_t)strlen(aDBkey)); - if (!dummy || keyItem.len < NS_NSS_LONG*4) { - PR_FREEIF(keyItem.data); - return NS_ERROR_INVALID_ARG; - } - - ScopedCERTCertificate cert; - // someday maybe we can speed up the search using the moduleID and slotID - // moduleID = NS_NSS_GET_LONG(keyItem.data); - // slotID = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG]); - - // build the issuer/SN structure - issuerSN.serialNumber.len = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG*2]); - issuerSN.derIssuer.len = NS_NSS_GET_LONG(&keyItem.data[NS_NSS_LONG*3]); - if (issuerSN.serialNumber.len == 0 || issuerSN.derIssuer.len == 0 - || issuerSN.serialNumber.len + issuerSN.derIssuer.len - != keyItem.len - NS_NSS_LONG*4) { - PR_FREEIF(keyItem.data); - return NS_ERROR_INVALID_ARG; - } - issuerSN.serialNumber.data= &keyItem.data[NS_NSS_LONG*4]; - issuerSN.derIssuer.data= &keyItem.data[NS_NSS_LONG*4+ - issuerSN.serialNumber.len]; - - cert = CERT_FindCertByIssuerAndSN(CERT_GetDefaultCertDB(), &issuerSN); - PR_FREEIF(keyItem.data); + ScopedCERTCertificate cert( + CERT_FindCertByIssuerAndSN(CERT_GetDefaultCertDB(), &issuerSN)); if (cert) { nsCOMPtr nssCert = nsNSSCertificate::Create(cert.get()); - if (!nssCert) + if (!nssCert) { return NS_ERROR_OUT_OF_MEMORY; + } nssCert.forget(_cert); } return NS_OK; @@ -1221,12 +1241,10 @@ nsNSSCertificateDB::getCertNames(CERTCertList *certList, node = CERT_LIST_NEXT(node)) { if (getCertType(node->cert) == type) { RefPtr pipCert(new nsNSSCertificate(node->cert)); - char *dbkey = nullptr; - char *namestr = nullptr; - nsAutoString certstr; - pipCert->GetDbKey(&dbkey); + nsAutoCString dbkey; + pipCert->GetDbKey(dbkey); nsAutoString keystr = NS_ConvertASCIItoUTF16(dbkey); - PR_FREEIF(dbkey); + char *namestr = nullptr; if (type == nsIX509Cert::EMAIL_CERT) { namestr = node->cert->emailAddr; } else { @@ -1237,6 +1255,7 @@ nsNSSCertificateDB::getCertNames(CERTCertList *certList, } } nsAutoString certname = NS_ConvertASCIItoUTF16(namestr ? namestr : ""); + nsAutoString certstr; certstr.Append(char16_t(DELIM)); certstr += certname; certstr.Append(char16_t(DELIM)); diff --git a/security/manager/ssl/nsNSSCertificateFakeTransport.cpp b/security/manager/ssl/nsNSSCertificateFakeTransport.cpp index 3b058202dc..766f98933f 100644 --- a/security/manager/ssl/nsNSSCertificateFakeTransport.cpp +++ b/security/manager/ssl/nsNSSCertificateFakeTransport.cpp @@ -31,7 +31,7 @@ nsNSSCertificateFakeTransport::~nsNSSCertificateFakeTransport() } NS_IMETHODIMP -nsNSSCertificateFakeTransport::GetDbKey(char**) +nsNSSCertificateFakeTransport::GetDbKey(nsACString&) { NS_NOTREACHED("Unimplemented on content process"); return NS_ERROR_NOT_IMPLEMENTED; diff --git a/security/manager/ssl/nsNSSShutDown.h b/security/manager/ssl/nsNSSShutDown.h index 49a3498722..fa6a0add12 100644 --- a/security/manager/ssl/nsNSSShutDown.h +++ b/security/manager/ssl/nsNSSShutDown.h @@ -129,6 +129,22 @@ protected: the tracking list, to ensure no additional attempt to free the resources will be made. + ---------------------------------------------------------------------------- + IMPORTANT NOTE REGARDING CLASSES THAT IMPLEMENT nsNSSShutDownObject BUT DO + NOT DIRECTLY HOLD NSS RESOURCES: + ---------------------------------------------------------------------------- + Currently, classes that do not hold NSS resources but do call NSS functions + inherit from nsNSSShutDownObject (and use the lock/isAlreadyShutDown + mechanism) as a way of ensuring it is safe to call those functions. Because + these classes do not hold any resources, however, it is tempting to skip the + destructor component of this interface. This MUST NOT be done, because + if an object of such a class is destructed before the nsNSSShutDownList + processes all of its entries, this essentially causes a use-after-free when + nsNSSShutDownList reaches the entry that has been destroyed. The safe way to + do this is to implement the destructor as usual but omit the call to + destructorSafeDestroyNSSReference() as it is unnecessary and probably isn't + defined for that class. + destructorSafeDestroyNSSReference() does not need to acquire an nsNSSShutDownPreventionLock or check isAlreadyShutDown() as long as it is only called by the destructor that has already acquired the lock and diff --git a/security/manager/ssl/tests/unit/test_cert_dbKey.js b/security/manager/ssl/tests/unit/test_cert_dbKey.js new file mode 100644 index 0000000000..742264e769 --- /dev/null +++ b/security/manager/ssl/tests/unit/test_cert_dbKey.js @@ -0,0 +1,152 @@ +// -*- Mode: javascript; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"use strict"; + +// This test tests that the nsIX509Cert.dbKey and nsIX509CertDB.findCertByDBKey +// APIs work as expected. That is, getting a certificate's dbKey and using it +// in findCertByDBKey should return the same certificate. Also, for backwards +// compatibility, findCertByDBKey should ignore any whitespace in its input +// (even though now nsIX509Cert.dbKey will never have whitespace in it). + +function hexStringToBytes(hex) { + let bytes = []; + for (let hexByteStr of hex.split(":")) { + bytes.push(parseInt(hexByteStr, 16)); + } + return bytes; +} + +function encodeCommonNameAsBytes(commonName) { + // The encoding will look something like this (in hex): + // 30 (SEQUENCE) + // 31 (SET) + // 30 (SEQUENCE) + // 06 (OID) 03 (length) + // 55 04 03 (id-at-commonName) + // 0C (UTF8String) + // + // To make things simple, it would be nice to have the length of each + // component be less than 128 bytes (so we can have single-byte lengths). + // For this to hold, the maximum length of the contents of the outermost + // SEQUENCE must be 127. Everything not in the contents of the common name + // will take up 11 bytes, so the value of the common name itself can be at + // most 116 bytes. + ok(commonName.length <= 116, + "test assumption: common name can't be longer than 116 bytes (makes " + + "DER encoding easier)"); + let commonNameOIDBytes = [ 0x06, 0x03, 0x55, 0x04, 0x03 ]; + let commonNameBytes = [ 0x0C, commonName.length ]; + for (let i = 0; i < commonName.length; i++) { + commonNameBytes.push(commonName.charCodeAt(i)); + } + let bytes = commonNameOIDBytes.concat(commonNameBytes); + bytes.unshift(bytes.length); + bytes.unshift(0x30); // SEQUENCE + bytes.unshift(bytes.length); + bytes.unshift(0x31); // SET + bytes.unshift(bytes.length); + bytes.unshift(0x30); // SEQUENCE + return bytes; +} + +function testInvalidDBKey(certDB, dbKey) { + let exceptionCaught = false; + try { + let cert = certDB.findCertByDBKey(dbKey, null); + } catch(e) { + do_print(e); + exceptionCaught = true; + } + ok(exceptionCaught, "should have thrown and caught an exception"); +} + +function testDBKeyForNonexistentCert(certDB, dbKey) { + let cert = certDB.findCertByDBKey(dbKey, null); + ok(!cert, "shouldn't find cert for given dbKey"); +} + +function byteArrayToByteString(bytes) { + let byteString = ""; + for (let b of bytes) { + byteString += String.fromCharCode(b); + } + return byteString; +} + +function run_test() { + do_get_profile(); + let certDB = Cc["@mozilla.org/security/x509certdb;1"] + .getService(Ci.nsIX509CertDB); + let cert = constructCertFromFile("bad_certs/test-ca.pem"); + equal(cert.issuerName, "CN=" + cert.issuerCommonName, + "test assumption: this certificate's issuer distinguished name " + + "consists only of a common name"); + let issuerBytes = encodeCommonNameAsBytes(cert.issuerCommonName); + ok(issuerBytes.length < 256, + "test assumption: length of encoded issuer is less than 256 bytes"); + let serialNumberBytes = hexStringToBytes(cert.serialNumber); + ok(serialNumberBytes.length < 256, + "test assumption: length of encoded serial number is less than 256 bytes"); + let dbKeyHeader = [ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, serialNumberBytes.length, + 0, 0, 0, issuerBytes.length ]; + let expectedDbKeyBytes = dbKeyHeader.concat(serialNumberBytes, issuerBytes); + let expectedDbKey = btoa(byteArrayToByteString(expectedDbKeyBytes)); + equal(cert.dbKey, expectedDbKey, + "actual and expected dbKey values should match"); + + let certFromDbKey = certDB.findCertByDBKey(expectedDbKey, null); + ok(certFromDbKey.equals(cert), + "nsIX509CertDB.findCertByDBKey should find the right certificate"); + + ok(expectedDbKey.length > 64, + "test assumption: dbKey should be longer than 64 characters"); + let expectedDbKeyWithCRLF = expectedDbKey.replace(/(.{64})/, "$1\r\n"); + ok(expectedDbKeyWithCRLF.indexOf("\r\n") == 64, + "test self-check: adding CRLF to dbKey should succeed"); + certFromDbKey = certDB.findCertByDBKey(expectedDbKeyWithCRLF, null); + ok(certFromDbKey.equals(cert), + "nsIX509CertDB.findCertByDBKey should work with dbKey with CRLF"); + + let expectedDbKeyWithSpaces = expectedDbKey.replace(/(.{64})/, "$1 "); + ok(expectedDbKeyWithSpaces.indexOf(" ") == 64, + "test self-check: adding spaces to dbKey should succeed"); + certFromDbKey = certDB.findCertByDBKey(expectedDbKeyWithSpaces, null); + ok(certFromDbKey.equals(cert), + "nsIX509CertDB.findCertByDBKey should work with dbKey with spaces"); + + // Test some invalid dbKey values. + testInvalidDBKey(certDB, "AAAA"); // Not long enough. + // No header. + testInvalidDBKey(certDB, btoa(byteArrayToByteString( + [ 0, 0, 0, serialNumberBytes.length, + 0, 0, 0, issuerBytes.length ].concat(serialNumberBytes, issuerBytes)))); + testInvalidDBKey(certDB, btoa(byteArrayToByteString( + [ 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, // serial number length is way too long + 255, 255, 255, 255, // issuer length is way too long + 0, 0, 0, 0 ]))); + // Truncated issuer. + testInvalidDBKey(certDB, btoa(byteArrayToByteString( + [ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, + 0, 0, 0, 10, + 1, + 1, 2, 3 ]))); + // Issuer doesn't decode to valid common name. + testDBKeyForNonexistentCert(certDB, btoa(byteArrayToByteString( + [ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, + 0, 0, 0, 3, + 1, + 1, 2, 3 ]))); + + // zero-length serial number and issuer -> no such certificate + testDBKeyForNonexistentCert(certDB, btoa(byteArrayToByteString( + [ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 ]))); +} diff --git a/security/manager/ssl/tests/unit/test_weak_crypto.js b/security/manager/ssl/tests/unit/test_weak_crypto.js index d93fe7c621..ab3bfaca18 100644 --- a/security/manager/ssl/tests/unit/test_weak_crypto.js +++ b/security/manager/ssl/tests/unit/test_weak_crypto.js @@ -103,8 +103,6 @@ function storeCertOverride(port, cert) { } function startClient(port, expectedResult, options = {}) { - let SSL_ERROR_BASE = Ci.nsINSSErrorsService.NSS_SSL_ERROR_BASE; - let SSL_ERROR_BAD_CERT_ALERT = SSL_ERROR_BASE + 17; let transport = socketTransportService.createTransport(["ssl"], 1, "127.0.0.1", port, null); if (options.isPrivate) { @@ -113,8 +111,7 @@ function startClient(port, expectedResult, options = {}) { let input; let output; - let inputDeferred = Promise.defer(); - let outputDeferred = Promise.defer(); + let deferred = Promise.defer(); let handler = { @@ -125,46 +122,48 @@ function startClient(port, expectedResult, options = {}) { }, onInputStreamReady: function(input) { - let errorCode = Cr.NS_OK; try { let data = NetUtil.readInputStreamToString(input, input.available()); equal(data, "HELLO", "Echoed data received"); input.close(); output.close(); + deferred.resolve(); } catch (e) { - errorCode = e.result; - } - try { - equal(errorCode, expectedResult, - "Actual and expected connection result should match"); - inputDeferred.resolve(); - } catch (e) { - inputDeferred.reject(e); + deferred.reject(e); } }, onOutputStreamReady: function(output) { try { - output.write("HELLO", 5); + try { + output.write("HELLO", 5); + } catch (e) { + if (e.result == Cr.NS_BASE_STREAM_WOULD_BLOCK) { + output.asyncWait(handler, 0, 0, Services.tm.currentThread); + return; + } + equal(e.result, expectedResult, + "Actual and expected connection result should match"); + output.close(); + deferred.resolve(); + return; + } + equal(Cr.NS_OK, expectedResult, "Connection should succeed"); do_print("Output to server written"); - outputDeferred.resolve(); input = transport.openInputStream(0, 0, 0); input.asyncWait(handler, 0, 0, Services.tm.currentThread); } catch (e) { - let errorCode = -1 * (e.result & 0xFFFF); - if (errorCode == SSL_ERROR_BAD_CERT_ALERT) { - do_print("Server doesn't like client cert"); - } - outputDeferred.reject(e); + deferred.reject(e); } } }; transport.setEventSink(handler, Services.tm.currentThread); - output = transport.openOutputStream(0, 0, 0); + output = transport.openOutputStream(Ci.nsITransport.OPEN_UNBUFFERED, 0, 0); + output.QueryInterface(Ci.nsIAsyncOutputStream); - return Promise.all([inputDeferred.promise, outputDeferred.promise]); + return deferred.promise; } function run_test() { diff --git a/security/manager/ssl/tests/unit/xpcshell.ini b/security/manager/ssl/tests/unit/xpcshell.ini index 2771027048..66e953226f 100644 --- a/security/manager/ssl/tests/unit/xpcshell.ini +++ b/security/manager/ssl/tests/unit/xpcshell.ini @@ -54,6 +54,7 @@ skip-if = toolkit == 'android' || toolkit == 'gonk' [test_pinning_dynamic.js] [test_pinning_header_parsing.js] +[test_cert_dbKey.js] [test_cert_keyUsage.js] [test_logoutAndTeardown.js] run-sequentially = hardcoded ports diff --git a/testing/mozharness/configs/beetmover/en_us.yml.tmpl b/testing/mozharness/configs/beetmover/en_us.yml.tmpl new file mode 100644 index 0000000000..7a372d8ae5 --- /dev/null +++ b/testing/mozharness/configs/beetmover/en_us.yml.tmpl @@ -0,0 +1,84 @@ +--- +metadata: + name: "Beet Mover Manifest" + description: "Maps artifact locations to s3 key names for the en-US locale" + owner: "release@mozilla.com" + +mapping: +{% for locale in locales %} + # common deliverables + {{ locale }}: + complete_mar: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.complete.mar + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ version }}.complete.mar + checksum: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.checksums + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.checksums + checksum_sig: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.checksums.asc + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.checksums.asc + buildinfo: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.json + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.json + mozinfo: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.mozinfo.json + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.mozinfo.json + socorroinfo: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.txt + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.txt + + {% if platform == "win32" %} + full_installer: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer.exe + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup {{ version }}.exe + stub_installer: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer-stub.exe + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup Stub {{ version }}.exe + package: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.zip + symbols: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip + {% endif %} + + {% if platform == "win64" %} + full_installer: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.installer.exe + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox Setup {{ version }}.exe + package: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.zip + symbols: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip + {% endif %} + + {% if platform == "linux-i686" %} + package: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.tar.bz2 + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.tar.bz2 + symbols: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip + {% endif %} + + {% if platform == "linux-x86_64" %} + package: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.tar.bz2 + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.tar.bz2 + symbols: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/firefox-{{ version }}.crashreporter-symbols.zip + {% endif %} + + {% if platform == "mac" %} + package: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.dmg + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox {{ version }}.dmg + symbols: + artifact: {{ artifact_base_url }}/firefox-{{ app_version }}.{{ locale }}.{{ platform }}.crashreporter-symbols.zip + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/{{ platform }}/{{ locale }}/Firefox {{ version }}.crashreporter-symbols.zip + {% endif %} + +{% endfor %} diff --git a/testing/mozharness/configs/beetmover/partials.yml.tmpl b/testing/mozharness/configs/beetmover/partials.yml.tmpl new file mode 100644 index 0000000000..0f4d2c353a --- /dev/null +++ b/testing/mozharness/configs/beetmover/partials.yml.tmpl @@ -0,0 +1,16 @@ +--- +metadata: + name: "Beet Mover Manifest" + description: "Maps artifact locations to s3 key names for partials" + owner: "release@mozilla.com" + +mapping: +{% for locale in locales %} + {{ locale }}: + partial_mar: + artifact: {{ artifact_base_url }}/firefox-{{ partial_version }}-{{ version }}.{{ locale }}.{{ platform }}.partial.mar + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ partial_version }}-{{ version }}.partial.mar + partial_mar_sig: + artifact: {{ artifact_base_url }}/firefox-{{ partial_version }}-{{ version }}.{{ locale }}.{{ platform }}.partial.mar.asc + s3_key: {{ s3_prefix }}/{{ version }}-candidates/{{ build_num }}/update/{{ platform }}/{{ locale }}/firefox-{{ partial_version }}-{{ version }}.partial.mar.asc +{% endfor %} diff --git a/testing/mozharness/external_tools/extract_and_run_command.py b/testing/mozharness/external_tools/extract_and_run_command.py new file mode 100644 index 0000000000..ab48ee1dff --- /dev/null +++ b/testing/mozharness/external_tools/extract_and_run_command.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +"""\ +Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories] + -j is the number of workers to start, defaulting to 1. + [command to run] must be a command that can accept one or many files + to process as arguments. + +WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to + terminate it early. + """ + +### The canonical location for this file is +### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py +### +### Please update the copy in puppet to deploy new changes to +### stage.mozilla.org, see +# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage + +import logging +import os +from os import path +import sys +from Queue import Queue +import shutil +import subprocess +import tempfile +from threading import Thread +import time + +logging.basicConfig( + stream=sys.stdout, level=logging.INFO, format="%(message)s") +log = logging.getLogger(__name__) + +try: + # the future - https://github.com/mozilla/build-mar via a venv + from mardor.marfile import BZ2MarFile +except: + # the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py + sys.path.append( + path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils")) + from mar import BZ2MarFile + +SEVENZIP = "7za" + + +def extractMar(filename, tempdir): + m = BZ2MarFile(filename) + m.extractall(path=tempdir) + + +def extractExe(filename, tempdir): + try: + # We don't actually care about output, put we redirect to a tempfile + # to avoid deadlocking in wait() when stdout=PIPE + fd = tempfile.TemporaryFile() + proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename], + stdout=fd, stderr=subprocess.STDOUT) + proc.wait() + except subprocess.CalledProcessError: + # Not all EXEs are 7-zip files, so we have to ignore extraction errors + pass + +# The keys here are matched against the last 3 characters of filenames. +# The values are callables that accept two string arguments. +EXTRACTORS = { + '.mar': extractMar, + '.exe': extractExe, +} + + +def find_files(d): + """yields all of the files in `d'""" + for root, dirs, files in os.walk(d): + for f in files: + yield path.abspath(path.join(root, f)) + + +def rchmod(d, mode=0755): + """chmods everything in `d' to `mode', including `d' itself""" + os.chmod(d, mode) + for root, dirs, files in os.walk(d): + for item in dirs: + os.chmod(path.join(root, item), mode) + for item in files: + os.chmod(path.join(root, item), mode) + + +def maybe_extract(filename): + """If an extractor is found for `filename', extracts it to a temporary + directory and chmods it. The consumer is responsible for removing + the extracted files, if desired.""" + ext = path.splitext(filename)[1] + if ext not in EXTRACTORS.keys(): + return None + # Append the full filepath to the tempdir + tempdir_root = tempfile.mkdtemp() + tempdir = path.join(tempdir_root, filename.lstrip('/')) + os.makedirs(tempdir) + EXTRACTORS[ext](filename, tempdir) + rchmod(tempdir_root) + return tempdir_root + + +def process(item, command): + def format_time(t): + return time.strftime("%H:%M:%S", time.localtime(t)) + # Buffer output to avoid interleaving of multiple workers' + logs = [] + args = [item] + proc = None + start = time.time() + logs.append("START %s: %s" % (format_time(start), item)) + # If the file was extracted, we need to process all of its files, too. + tempdir = maybe_extract(item) + if tempdir: + for f in find_files(tempdir): + args.append(f) + + try: + fd = tempfile.TemporaryFile() + proc = subprocess.Popen(command + args, stdout=fd) + proc.wait() + if proc.returncode != 0: + raise Exception("returned %s" % proc.returncode) + finally: + if tempdir: + shutil.rmtree(tempdir) + fd.seek(0) + # rstrip() here to avoid an unnecessary newline, if it exists. + logs.append(fd.read().rstrip()) + end = time.time() + elapsed = end - start + logs.append("END %s (%d seconds elapsed): %s\n" % ( + format_time(end), elapsed, item)) + # Now that we've got all of our output, print it. It's important that + # the logging module is used for this, because "print" is not + # thread-safe. + log.info("\n".join(logs)) + + +def worker(command, errors): + item = q.get() + while item != None: + try: + process(item, command) + except: + errors.put(item) + item = q.get() + +if __name__ == '__main__': + # getopt is used in favour of optparse to enable "--" as a separator + # between the command and list of files. optparse doesn't allow that. + from getopt import getopt + options, args = getopt(sys.argv[1:], 'j:h', ['help']) + + concurrency = 1 + for o, a in options: + if o == '-j': + concurrency = int(a) + elif o in ('-h', '--help'): + log.info(__doc__) + sys.exit(0) + + if len(args) < 3 or '--' not in args: + log.error(__doc__) + sys.exit(1) + + command = [] + while args[0] != "--": + command.append(args.pop(0)) + args.pop(0) + + q = Queue() + errors = Queue() + threads = [] + for i in range(concurrency): + t = Thread(target=worker, args=(command, errors)) + t.start() + threads.append(t) + + # find_files is a generator, so work will begin prior to it finding + # all of the files + for arg in args: + if path.isfile(arg): + q.put(arg) + else: + for f in find_files(arg): + q.put(f) + # Because the workers are started before we start populating the q + # they can't use .empty() to determine whether or not their done. + # We also can't use q.join() or j.task_done(), because we need to + # support Python 2.4. We know that find_files won't yield None, + # so we can detect doneness by having workers die when they get None + # as an item. + for i in range(concurrency): + q.put(None) + + for t in threads: + t.join() + + if not errors.empty(): + log.error("Command failed for the following files:") + while not errors.empty(): + log.error(" %s" % errors.get()) + sys.exit(1) diff --git a/testing/mozharness/scripts/release/beet_mover.py b/testing/mozharness/scripts/release/beet_mover.py new file mode 100644 index 0000000000..4a0634acd1 --- /dev/null +++ b/testing/mozharness/scripts/release/beet_mover.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""beet_mover.py. + +downloads artifacts, scans them and uploads them to s3 +""" +import hashlib +import sys +import os +import pprint +import re +from os import listdir +from os.path import isfile, join + +sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0]))) +from mozharness.base.log import FATAL +from mozharness.base.python import VirtualenvMixin +from mozharness.base.script import BaseScript +import mozharness + + +def get_hash(content, hash_type="md5"): + h = hashlib.new(hash_type) + h.update(content) + return h.hexdigest() + + +def get_aws_auth(): + """ + retrieves aws creds and deletes them from os.environ if present. + """ + aws_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + + if aws_key_id and aws_secret_key: + del os.environ['AWS_ACCESS_KEY_ID'] + del os.environ['AWS_SECRET_ACCESS_KEY'] + else: + exit("could not determine aws credentials from os environment") + + return aws_key_id, aws_secret_key + + +CONFIG_OPTIONS = [ + [["--template"], { + "dest": "template", + "help": "Specify jinja2 template file", + }], + [['--locale', ], { + "action": "extend", + "dest": "locales", + "type": "string", + "help": "Specify the locale(s) to upload."}], + [["--platform"], { + "dest": "platform", + "help": "Specify the platform of the build", + }], + [["--version"], { + "dest": "version", + "help": "full release version based on gecko and tag/stage identifier. e.g. '44.0b1'" + }], + [["--app-version"], { + "dest": "app_version", + "help": "numbered version based on gecko. e.g. '44.0'" + }], + [["--partial-version"], { + "dest": "partial_version", + "help": "the partial version the mar is based off of" + }], + [["--artifact-subdir"], { + "dest": "artifact_subdir", + "default": 'build', + "help": "subdir location for taskcluster artifacts after public/ base.", + }], + [["--build-num"], { + "dest": "build_num", + "help": "the release build identifier" + }], + [["--taskid"], { + "dest": "taskid", + "help": "taskcluster task id to download artifacts from", + }], + [["--production"], { + "dest": "production", + "default": False, + "help": "taskcluster task id to download artifacts from", + }], + [["--exclude"], { + "dest": "excludes", + "action": "append", + "help": "List of filename patterns to exclude. See script source for default", + }], + [["-s", "--scan-parallelization"], { + "dest": "scan_parallelization", + "default": 4, + "type": "int", + "help": "Number of concurrent file scans", + }], +] + +DEFAULT_EXCLUDES = [ + r"^.*tests.*$", + r"^.*crashreporter.*$", + r"^.*\.zip(\.asc)?$", + r"^.*\.log$", + r"^.*\.txt$", + r"^.*\.asc$", + r"^.*/partner-repacks.*$", + r"^.*.checksums(\.asc)?$", + r"^.*/logs/.*$", + r"^.*/jsshell.*$", + r"^.*json$", + r"^.*/host.*$", + r"^.*/mar-tools/.*$", + r"^.*gecko-unsigned-unaligned.apk$", + r"^.*robocop.apk$", + r"^.*contrib.*" +] +CACHE_DIR = 'cache' + + +class BeetMover(BaseScript, VirtualenvMixin, object): + def __init__(self, aws_creds): + beetmover_kwargs = { + 'config_options': CONFIG_OPTIONS, + 'all_actions': [ + # 'clobber', + 'create-virtualenv', + 'activate-virtualenv', + 'generate-candidates-manifest', + 'verify-bits', # beets + 'download-bits', # beets + 'scan-bits', # beets + 'upload-bits', # beets + ], + 'require_config_file': False, + # Default configuration + 'config': { + # base index url where to find taskcluster artifact based on taskid + # TODO - find out if we need to support taskcluster run number other than 0. + # e.g. maybe we could end up with artifacts in > 'run 0' in a re-trigger situation? + "artifact_base_url": 'https://queue.taskcluster.net/v1/task/{taskid}/runs/0/artifacts/public/{subdir}', + "virtualenv_modules": [ + "boto", + "PyYAML", + "Jinja2", + "redo", + "mar", + ], + "virtualenv_path": "venv", + 'buckets': { + 'development': "mozilla-releng-beet-mover-dev", + 'production': "mozilla-releng-beet-mover", + }, + 'product': 'firefox', + }, + } + #todo do excludes need to be configured via command line for specific builds? + super(BeetMover, self).__init__(**beetmover_kwargs) + + c = self.config + self.manifest = {} + # assigned in _post_create_virtualenv + self.virtualenv_imports = None + self.bucket = c['buckets']['production'] if c['production'] else c['buckets']['development'] + self.aws_key_id, self.aws_secret_key = aws_creds + # if excludes is set from command line, use it otherwise use defaults + self.excludes = self.config.get('excludes', DEFAULT_EXCLUDES) + dirs = self.query_abs_dirs() + self.dest_dir = os.path.join(dirs['abs_work_dir'], CACHE_DIR) + + def activate_virtualenv(self): + """ + activates virtualenv and adds module imports to a instance wide namespace. + + creating and activating a virtualenv onto the currently executing python interpreter is a + bit black magic. Rather than having import statements added in various places within the + script, we import them here immediately after we activate the newly created virtualenv + """ + VirtualenvMixin.activate_virtualenv(self) + + import boto + import yaml + import jinja2 + self.virtualenv_imports = { + 'boto': boto, + 'yaml': yaml, + 'jinja2': jinja2, + } + self.log("activated virtualenv with the modules: {}".format(str(self.virtualenv_imports))) + + def generate_candidates_manifest(self): + """ + generates and outputs a manifest that maps expected Taskcluster artifact names + to release deliverable names + """ + self.log('generating manifest from {}...'.format(self.config['template'])) + template_dir, template_file = os.path.split(os.path.abspath(self.config['template'])) + jinja2 = self.virtualenv_imports['jinja2'] + yaml = self.virtualenv_imports['yaml'] + + jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir), + undefined=jinja2.StrictUndefined) + template = jinja_env.get_template(template_file) + template_vars = { + "platform": self.config['platform'], + "locales": self.config['locales'], + "version": self.config['version'], + "app_version": self.config.get('app_version', ''), + "partial_version": self.config.get('partial_version', ''), + "build_num": self.config['build_num'], + # mirror current release folder structure + "s3_prefix": 'pub/{}/candidates'.format(self.config['product']), + "artifact_base_url": self.config['artifact_base_url'].format( + taskid=self.config['taskid'], subdir=self.config['artifact_subdir'] + ) + } + self.manifest = yaml.safe_load(template.render(**template_vars)) + + self.log("manifest generated:") + self.log(pprint.pformat(self.manifest['mapping'])) + + def verify_bits(self): + """ + inspects each artifact and verifies that they were created by trustworthy tasks + """ + # TODO + self.log('skipping verification. unimplemented...') + + def download_bits(self): + """ + downloads list of artifacts to self.dest_dir dir based on a given manifest + """ + self.log('downloading and uploading artifacts to self_dest_dir...') + + # TODO - do we want to mirror/upload to more than one region? + dirs = self.query_abs_dirs() + + for locale in self.manifest['mapping']: + for deliverable in self.manifest['mapping'][locale]: + self.log("downloading '{}' deliverable for '{}' locale".format(deliverable, locale)) + # download locally to working dir + source=self.manifest['mapping'][locale][deliverable]['artifact'] + file_name = self.retry(self.download_file, + args=[source], + kwargs={'parent_dir': dirs['abs_work_dir']}, + error_level=FATAL) + self.log('Success!') + + def upload_bits(self): + """ + uploads list of artifacts to s3 candidates dir based on a given manifest + """ + self.log('uploading artifacts to s3...') + dirs = self.query_abs_dirs() + + # connect to s3 + boto = self.virtualenv_imports['boto'] + conn = boto.connect_s3(self.aws_key_id, self.aws_secret_key) + bucket = conn.get_bucket(self.bucket) + + #todo change so this is not every entry in manifest - should exclude those that don't pass virus sign + #not sure how to determine this + for locale in self.manifest['mapping']: + for deliverable in self.manifest['mapping'][locale]: + self.log("uploading '{}' deliverable for '{}' locale".format(deliverable, locale)) + #we have already downloaded the files locally so we can use that version + source = self.manifest['mapping'][locale][deliverable]['artifact'] + downloaded_file = os.path.join(dirs['abs_work_dir'], self.get_filename_from_url(source)) + self.upload_bit( + source=downloaded_file, + s3_key=self.manifest['mapping'][locale][deliverable]['s3_key'], + bucket=bucket, + ) + self.log('Success!') + + + def upload_bit(self, source, s3_key, bucket): + # TODO - do we want to mirror/upload to more than one region? + dirs = self.query_abs_dirs() + boto = self.virtualenv_imports['boto'] + + #todo need to copy from dir to s3 + + self.info('uploading to s3 with key: {}'.format(s3_key)) + key = boto.s3.key.Key(bucket) # create new key + key.key = s3_key # set key name + + self.info("Checking if `{}` already exists".format(s3_key)) + key = bucket.get_key(s3_key) + if not key: + self.info("Uploading to `{}`".format(s3_key)) + key = bucket.new_key(s3_key) + + # set key value + self.retry(key.set_contents_from_filename, args=[source], error_level=FATAL), + + # key.make_public() may lead to race conditions, because + # it doesn't pass version_id, so it may not set permissions + bucket.set_canned_acl(acl_str='public-read', key_name=s3_key, + version_id=key.version_id) + else: + if not get_hash(key.get_contents_as_string()) == get_hash(open(source).read()): + # for now, let's halt. If necessary, we can revisit this and allow for overwrites + # to the same buildnum release with different bits + self.fatal("`{}` already exists with different checksum.".format(s3_key)) + self.log("`{}` has the same MD5 checksum, not uploading".format(s3_key)) + + def scan_bits(self): + + dirs = self.query_abs_dirs() + + filenames = [f for f in listdir(dirs['abs_work_dir']) if isfile(join(dirs['abs_work_dir'], f))] + self.mkdir_p(self.dest_dir) + for file_name in filenames: + if self._matches_exclude(file_name): + self.info("Excluding {} from virus scan".format(file_name)) + else: + self.info('Copying {} to {}'.format(file_name,self.dest_dir)) + self.copyfile(os.path.join(dirs['abs_work_dir'], file_name), os.path.join(self.dest_dir,file_name)) + self._scan_files() + self.info('Emptying {}'.format(self.dest_dir)) + self.rmtree(self.dest_dir) + + def _scan_files(self): + """Scan the files we've collected. We do the download and scan concurrently to make + it easier to have a coherent log afterwards. Uses the venv python.""" + external_tools_path = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), 'external_tools') + self.run_command([self.query_python_path(), os.path.join(external_tools_path,'extract_and_run_command.py'), + '-j{}'.format(self.config['scan_parallelization']), + 'clamscan', '--no-summary', '--', self.dest_dir]) + + def _matches_exclude(self, keyname): + return any(re.search(exclude, keyname) for exclude in self.excludes) + +if __name__ == '__main__': + beet_mover = BeetMover(get_aws_auth()) + beet_mover.run_and_exit() diff --git a/xpcom/threads/LazyIdleThread.h b/xpcom/threads/LazyIdleThread.h index 44ca824d0a..77b38ce8b3 100644 --- a/xpcom/threads/LazyIdleThread.h +++ b/xpcom/threads/LazyIdleThread.h @@ -45,10 +45,7 @@ public: NS_DECL_NSITIMERCALLBACK NS_DECL_NSITHREADOBSERVER NS_DECL_NSIOBSERVER - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; enum ShutdownMethod { diff --git a/xpcom/threads/SharedThreadPool.h b/xpcom/threads/SharedThreadPool.h index 8212647991..8a90dbcd38 100644 --- a/xpcom/threads/SharedThreadPool.h +++ b/xpcom/threads/SharedThreadPool.h @@ -54,10 +54,6 @@ public: // Forward behaviour to wrapped thread pool implementation. NS_FORWARD_SAFE_NSITHREADPOOL(mPool); - // See bug 1155059 - MSVC forces us to not declare Dispatch normally in idl - // NS_FORWARD_SAFE_NSIEVENTTARGET(mEventTarget); - nsresult Dispatch(nsIRunnable *event, uint32_t flags) { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->Dispatch(event, flags); } - NS_IMETHOD DispatchFromScript(nsIRunnable *event, uint32_t flags) override { return Dispatch(event, flags); } @@ -65,6 +61,8 @@ public: NS_IMETHOD Dispatch(already_AddRefed&& event, uint32_t flags) override { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->Dispatch(Move(event), flags); } + using nsIEventTarget::Dispatch; + NS_IMETHOD IsOnCurrentThread(bool *_retval) override { return !mEventTarget ? NS_ERROR_NULL_POINTER : mEventTarget->IsOnCurrentThread(_retval); } // Creates necessary statics. Called once at startup. diff --git a/xpcom/threads/nsIEventTarget.idl b/xpcom/threads/nsIEventTarget.idl index bfff6d171f..c3e4d3c329 100644 --- a/xpcom/threads/nsIEventTarget.idl +++ b/xpcom/threads/nsIEventTarget.idl @@ -13,15 +13,10 @@ native alreadyAddRefed_nsIRunnable(already_AddRefed&&); -[scriptable, uuid(f9d60700-e6dc-4a72-9537-689058655472)] +[scriptable, uuid(88145945-3278-424e-9f37-d874cbdd9f6f)] interface nsIEventTarget : nsISupports { /* until we can get rid of all uses, keep the non-alreadyAddRefed<> version */ - /** - * This must be non-virtual due to issues with MSVC 2013's ordering of - * vtbls for overloads. With other platforms we can leave this virtual - * and avoid adding lots of Dispatch() methods to classes inheriting this. - */ %{C++ nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); diff --git a/xpcom/threads/nsIThread.idl b/xpcom/threads/nsIThread.idl index e9bd22c59c..36fd9af152 100644 --- a/xpcom/threads/nsIThread.idl +++ b/xpcom/threads/nsIThread.idl @@ -17,7 +17,7 @@ * * See nsIThreadManager for the API used to create and locate threads. */ -[scriptable, uuid(594feb13-6164-4054-b5a1-ad62e10ea15d)] +[scriptable, uuid(5801d193-29d1-4964-a6b7-70eb697ddf2b)] interface nsIThread : nsIEventTarget { /** diff --git a/xpcom/threads/nsIThreadInternal.idl b/xpcom/threads/nsIThreadInternal.idl index 9287bf5d79..e5d7cc83ff 100644 --- a/xpcom/threads/nsIThreadInternal.idl +++ b/xpcom/threads/nsIThreadInternal.idl @@ -13,7 +13,7 @@ interface nsIThreadObserver; * The XPCOM thread object implements this interface, which allows a consumer * to observe dispatch activity on the thread. */ -[scriptable, uuid(9cc51754-2eb3-4b46-ae99-38a61881c622)] +[scriptable, uuid(a3a72e5f-71d9-4add-8f30-59a78fb6d5eb)] interface nsIThreadInternal : nsIThread { /** diff --git a/xpcom/threads/nsIThreadPool.idl b/xpcom/threads/nsIThreadPool.idl index 77a124a85e..d04e8504a6 100644 --- a/xpcom/threads/nsIThreadPool.idl +++ b/xpcom/threads/nsIThreadPool.idl @@ -27,7 +27,7 @@ interface nsIThreadPoolListener : nsISupports * anonymous (unnamed) worker threads. An event dispatched to the thread pool * will be run on the next available worker thread. */ -[scriptable, uuid(cacd4a2e-2655-4ff8-894c-10c15883cd0a)] +[scriptable, uuid(76ce99c9-8e43-489a-9789-f27cc4424965)] interface nsIThreadPool : nsIEventTarget { /** diff --git a/xpcom/threads/nsThread.h b/xpcom/threads/nsThread.h index b2ba05b360..e1abae918c 100644 --- a/xpcom/threads/nsThread.h +++ b/xpcom/threads/nsThread.h @@ -33,10 +33,7 @@ public: NS_DECL_NSITHREAD NS_DECL_NSITHREADINTERNAL NS_DECL_NSISUPPORTSPRIORITY - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; enum MainThreadFlag { diff --git a/xpcom/threads/nsThreadPool.h b/xpcom/threads/nsThreadPool.h index b32ad191a8..9552738b36 100644 --- a/xpcom/threads/nsThreadPool.h +++ b/xpcom/threads/nsThreadPool.h @@ -28,10 +28,7 @@ public: NS_DECL_NSIEVENTTARGET NS_DECL_NSITHREADPOOL NS_DECL_NSIRUNNABLE - // missing from NS_DECL_NSIEVENTTARGET because MSVC - nsresult Dispatch(nsIRunnable* aEvent, uint32_t aFlags) { - return Dispatch(nsCOMPtr(aEvent).forget(), aFlags); - } + using nsIEventTarget::Dispatch; nsThreadPool();