import from UXP: Issue #1890 - Update libopus (a8b1099a)

This commit is contained in:
2022-05-07 07:13:26 +08:00
parent fefb8accee
commit 284b2925de
229 changed files with 10999 additions and 5730 deletions
+2 -2
View File
@@ -11,10 +11,10 @@ celt_pitch_xcorr_arm-gnu.s: celt/arm/armopts-gnu.S
# armopts needs a specific rule, because arm2gnu.pl will always add the .S
# suffix when translating the files that include it.
celt/arm/armopts-gnu.S: celt/arm/armopts.s $(call mkdir_deps,celt/arm) $(GLOBAL_DEPS)
celt/arm/armopts-gnu.S: $(srcdir)/celt/arm/armopts.s $(call mkdir_deps,celt/arm) $(GLOBAL_DEPS)
$(PERL) $(srcdir)/celt/arm/arm2gnu.pl < $< > $@
# For all others, we can use an implicit rule
%-gnu.s: celt/arm/%.s $(GLOBAL_DEPS)
%-gnu.s: $(srcdir)/celt/arm/%.s $(GLOBAL_DEPS)
$(PERL) $(srcdir)/celt/arm/arm2gnu.pl < $< > $@
endif
@@ -6,6 +6,8 @@ Any changes made to this version of the source should
be reflected in that script, e.g. by applying patch
files after the copy step.
The upstream repository is https://git.xiph.org/opus.git
The upstream repository is https://github.com/xiph/opus.git
Official master repository is https://gitlab.xiph.org/xiph/opus.git
Release: commit 2654707e86cc94413998976d179b2ab4a2aa3114 (2022-04-01T14:32:38.000-04:00)
The git tag/revision used was v1.1.4.
+8 -8
View File
@@ -58,12 +58,12 @@
# define S_MUL(a,b) MULT16_32_Q15(b, a)
# define C_MUL(m,a,b) \
do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
# define C_MULC(m,a,b) \
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
@@ -77,17 +77,17 @@
DIVSCALAR( (c).i , div); }while (0)
#define C_ADD( res, a,b)\
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
}while(0)
#define C_SUB( res, a,b)\
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
}while(0)
#define C_ADDTO( res , a)\
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
}while(0)
#define C_SUBFROM( res , a)\
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
}while(0)
#if defined(OPUS_ARM_INLINE_ASM)
+47 -8
View File
@@ -46,25 +46,53 @@
# endif
# endif
#if OPUS_GNUC_PREREQ(3, 0)
#define opus_likely(x) (__builtin_expect(!!(x), 1))
#define opus_unlikely(x) (__builtin_expect(!!(x), 0))
#else
#define opus_likely(x) (!!(x))
#define opus_unlikely(x) (!!(x))
#endif
#define CELT_SIG_SCALE 32768.f
#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
#ifdef ENABLE_ASSERTIONS
#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__);
#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING)
#ifdef __GNUC__
__attribute__((noreturn))
#endif
void celt_fatal(const char *str, const char *file, int line);
#if defined(CELT_C) && !defined(OVERRIDE_celt_fatal)
#include <stdio.h>
#include <stdlib.h>
#ifdef __GNUC__
__attribute__((noreturn))
#endif
static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
void celt_fatal(const char *str, const char *file, int line)
{
fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
#if defined(_MSC_VER)
_set_abort_behavior( 0, _WRITE_ABORT_MSG);
#endif
abort();
}
#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
#endif
#define celt_assert(cond) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond);}}
#define celt_assert2(cond, message) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond "\n" message);}}
#define MUST_SUCCEED(call) celt_assert((call) == OPUS_OK)
#else
#define celt_assert(cond)
#define celt_assert2(cond, message)
#define MUST_SUCCEED(call) do {if((call) != OPUS_OK) {RESTORE_STACK; return OPUS_INTERNAL_ERROR;} } while (0)
#endif
#if defined(ENABLE_ASSERTIONS)
#define celt_sig_assert(cond) {if (!(cond)) {CELT_FATAL("signal assertion failed: " #cond);}}
#else
#define celt_sig_assert(cond)
#endif
#define IMUL32(a,b) ((a)*(b))
@@ -93,14 +121,20 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
typedef opus_int16 opus_val16;
typedef opus_int32 opus_val32;
typedef opus_int64 opus_val64;
typedef opus_val32 celt_sig;
typedef opus_val16 celt_norm;
typedef opus_val32 celt_ener;
#define celt_isnan(x) 0
#define Q15ONE 32767
#define SIG_SHIFT 12
/* Safe saturation value for 32-bit signals. Should be less than
2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
#define SIG_SAT (300000000)
#define NORM_SCALING 16384
@@ -129,7 +163,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/fixed_arm64.h"
#elif OPUS_ARM_INLINE_EDSP
#elif defined (OPUS_ARM_INLINE_EDSP)
#include "arm/fixed_armv5e.h"
#elif defined (OPUS_ARM_INLINE_ASM)
#include "arm/fixed_armv4.h"
@@ -147,6 +181,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
typedef float opus_val16;
typedef float opus_val32;
typedef float opus_val64;
typedef float celt_sig;
typedef float celt_norm;
@@ -186,6 +221,7 @@ static OPUS_INLINE int celt_isnan(float x)
#define NEG16(x) (-(x))
#define NEG32(x) (-(x))
#define NEG32_ovflw(x) (-(x))
#define EXTRACT16(x) (x)
#define EXTEND32(x) (x)
#define SHR16(a,shift) (a)
@@ -202,6 +238,7 @@ static OPUS_INLINE int celt_isnan(float x)
#define SATURATE16(x) (x)
#define ROUND16(a,shift) (a)
#define SROUND16(a,shift) (a)
#define HALF16(x) (.5f*(x))
#define HALF32(x) (.5f*(x))
@@ -209,6 +246,8 @@ static OPUS_INLINE int celt_isnan(float x)
#define SUB16(a,b) ((a)-(b))
#define ADD32(a,b) ((a)+(b))
#define SUB32(a,b) ((a)-(b))
#define ADD32_ovflw(a,b) ((a)+(b))
#define SUB32_ovflw(a,b) ((a)-(b))
#define MULT16_16_16(a,b) ((a)*(b))
#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
@@ -243,9 +282,9 @@ static OPUS_INLINE int celt_isnan(float x)
#ifndef GLOBAL_STACK_SIZE
#ifdef FIXED_POINT
#define GLOBAL_STACK_SIZE 100000
#define GLOBAL_STACK_SIZE 120000
#else
#define GLOBAL_STACK_SIZE 100000
#define GLOBAL_STACK_SIZE 120000
#endif
#endif
+2 -2
View File
@@ -164,11 +164,11 @@ while (<>) {
$prefix = "";
if ($proc)
{
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
$prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple);
# Make sure we $prefix isn't empty here (for the $apple case).
# We handle mangling the label here, make sure it doesn't match
# the label handling below (if $prefix would be empty).
$prefix = "; ";
$prefix = $prefix."; ";
push(@proc_stack, $proc);
s/^[A-Za-z_\.]\w+/$symprefix$&:/;
}
+19 -2
View File
@@ -35,12 +35,29 @@
#if defined(OPUS_HAVE_RTCD)
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
celt_inner_prod_neon /* NEON */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2) = {
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
dual_inner_prod_neon /* NEON */
};
# endif
# if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = {
const opus_val16 *, opus_val32 *, int, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
@@ -51,7 +68,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
# else /* !FIXED_POINT */
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
const opus_val16 *, opus_val32 *, int, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
+2
View File
@@ -93,6 +93,8 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
#elif defined(__linux__)
/* Linux based */
#include <stdio.h>
opus_uint32 opus_cpu_capabilities(void)
{
opus_uint32 flags = 0;
@@ -1,7 +1,7 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_fft.c
@file celt_fft_ne10.c
@brief ARM Neon optimizations for fft using NE10 library
*/
@@ -36,7 +36,6 @@
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"
@@ -1,7 +1,7 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_mdct.c
@file celt_mdct_ne10.c
@brief ARM Neon optimizations for mdct using NE10 library
*/
+5 -105
View File
@@ -191,121 +191,21 @@ static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
vst1q_f32(sum, SUMM);
}
/*
* Function: xcorr_kernel_neon_float_process1
* ---------------------------------
* Computes single correlation values and stores in *sum
*/
static void xcorr_kernel_neon_float_process1(const float32_t *x,
const float32_t *y, float32_t *sum, int len) {
float32x4_t XX[4];
float32x4_t YY[4];
float32x2_t XX_2;
float32x2_t YY_2;
float32x4_t SUMM;
float32x2_t SUMM_2[2];
const float32_t *xi = x;
const float32_t *yi = y;
SUMM = vdupq_n_f32(0);
/* Work on 16 values per iteration */
while (len >= 16) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
XX[2] = vld1q_f32(xi);
xi += 4;
XX[3] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
yi += 4;
YY[3] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
len -= 16;
}
/* Work on 8 values */
if (len >= 8) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
len -= 8;
}
/* Work on 4 values */
if (len >= 4) {
XX[0] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
len -= 4;
}
/* Start accumulating results */
SUMM_2[0] = vget_low_f32(SUMM);
if (len >= 2) {
/* While at it, consume 2 more values if available */
XX_2 = vld1_f32(xi);
xi += 2;
YY_2 = vld1_f32(yi);
yi += 2;
SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
len -= 2;
}
SUMM_2[1] = vget_high_f32(SUMM);
SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
/* Ok, now we have result accumulated in SUMM_2[0].0 */
if (len > 0) {
/* Case when you have one value left */
XX_2 = vld1_dup_f32(xi);
YY_2 = vld1_dup_f32(yi);
SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
}
vst1_lane_f32(sum, SUMM_2[0], 0);
}
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch) {
opus_val32 *xcorr, int len, int max_pitch, int arch) {
int i;
(void)arch;
celt_assert(max_pitch > 0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i = 0; i < (max_pitch-3); i += 4) {
xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
(float32_t *)xcorr+i, len);
}
/* In case max_pitch isn't multiple of 4
* compute single correlation value per iteration
*/
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for (; i < max_pitch; i++) {
xcorr_kernel_neon_float_process1((const float32_t *)_x,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
xcorr[i] = celt_inner_prod_neon(_x, _y+i, len);
}
}
#endif
@@ -153,7 +153,7 @@ xcorr_kernel_neon_process1
ENDP
; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
; opus_val32 *xcorr, int len, int max_pitch)
; opus_val32 *xcorr, int len, int max_pitch, int arch)
celt_pitch_xcorr_neon PROC
; input:
; r0 = opus_val16 *_x
@@ -168,6 +168,8 @@ celt_pitch_xcorr_neon PROC
; r6 = int max_pitch
; r12 = int j
; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
; ignored:
; int arch
STMFD sp!, {r4-r6, lr}
LDR r6, [sp, #16]
VMOV.S32 q15, #1
@@ -358,6 +360,8 @@ celt_pitch_xcorr_edsp PROC
; r9 = opus_val32 sum3
; r1 = int max_pitch
; r12 = int j
; ignored:
; int arch
STMFD sp!, {r4-r11, lr}
MOV r5, r1
LDR r1, [sp, #36]
-1
View File
@@ -34,7 +34,6 @@
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if defined(HAVE_ARM_NE10)
+3 -3
View File
@@ -37,7 +37,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
"#MULT16_32_Q16\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b),"r"(a<<16)
: "%r"(b),"r"(SHL32(a,16))
);
return rd_hi;
}
@@ -54,10 +54,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
"#MULT16_32_Q15\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(a<<16)
: "%r"(b), "r"(SHL32(a,16))
);
/*We intentionally don't OR in the high bit of rd_lo for speed.*/
return rd_hi<<1;
return SHL32(rd_hi,1);
}
#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+2 -2
View File
@@ -59,7 +59,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
: "=r"(res)
: "r"(b), "r"(a)
);
return res<<1;
return SHL32(res,1);
}
#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
@@ -76,7 +76,7 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
"#MAC16_32_Q15\n\t"
"smlawb %0, %1, %2, %3;\n"
: "=r"(res)
: "r"(b<<1), "r"(a), "r"(c)
: "r"(SHL32(b,1)), "r"(a), "r"(c)
);
return res;
}
-1
View File
@@ -33,7 +33,6 @@
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if defined(HAVE_ARM_NE10)
+45 -11
View File
@@ -30,11 +30,47 @@
# include "armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N);
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01,
const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_CELT_INNER_PROD (1)
# define OVERRIDE_DUAL_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N))
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2))
# endif
# endif
# if !defined(OVERRIDE_CELT_INNER_PROD)
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N);
# define OVERRIDE_CELT_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_CELT_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N))
# endif
# endif
# if !defined(OVERRIDE_DUAL_INNER_PROD)
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x,
const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
# define OVERRIDE_DUAL_INNER_PROD (1)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_DUAL_INNER_PROD (1)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2))
# endif
# endif
# if defined(FIXED_POINT)
# if defined(OPUS_ARM_MAY_HAVE_NEON)
opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
@@ -43,7 +79,7 @@ opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
# if defined(OPUS_HAVE_RTCD) && \
@@ -52,18 +88,17 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
const opus_val16 *, opus_val32 *, int, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
xcorr, len, max_pitch, arch))
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
# endif
@@ -99,25 +134,24 @@ extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
/* Float case */
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
#endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
const opus_val16 *, opus_val32 *, int, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
xcorr, len, max_pitch, arch))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# define celt_pitch_xcorr celt_pitch_xcorr_float_neon
# endif
+290
View File
@@ -0,0 +1,290 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "pitch.h"
#ifdef FIXED_POINT
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy;
int16x8_t x_s16x8, y_s16x8;
int32x4_t xy_s32x4 = vdupq_n_s32(0);
int64x2_t xy_s64x2;
int64x1_t xy_s64x1;
for (i = 0; i < N - 7; i += 8) {
x_s16x8 = vld1q_s16(&x[i]);
y_s16x8 = vld1q_s16(&y[i]);
xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
}
if (N - i >= 4) {
const int16x4_t x_s16x4 = vld1_s16(&x[i]);
const int16x4_t y_s16x4 = vld1_s16(&y[i]);
xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
i += 4;
}
xy_s64x2 = vpaddlq_s32(xy_s32x4);
xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
xy = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
#ifdef OPUS_CHECK_ASM
celt_assert(celt_inner_prod_c(x, y, N) == xy);
#endif
return xy;
}
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02;
int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
int32x4_t xy01_s32x4 = vdupq_n_s32(0);
int32x4_t xy02_s32x4 = vdupq_n_s32(0);
int64x2_t xy01_s64x2, xy02_s64x2;
int64x1_t xy01_s64x1, xy02_s64x1;
for (i = 0; i < N - 7; i += 8) {
x_s16x8 = vld1q_s16(&x[i]);
y01_s16x8 = vld1q_s16(&y01[i]);
y02_s16x8 = vld1q_s16(&y02[i]);
xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
}
if (N - i >= 4) {
const int16x4_t x_s16x4 = vld1_s16(&x[i]);
const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
i += 4;
}
xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
xy01 = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
xy02 = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
#ifdef OPUS_CHECK_ASM
{
opus_val32 xy1_c, xy2_c;
dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
celt_assert(xy1_c == *xy1);
celt_assert(xy2_c == *xy2);
}
#endif
}
#else /* !FIXED_POINT */
/* ========================================================================== */
#ifdef OPUS_CHECK_ASM
/* This part of code simulates floating-point NEON operations. */
/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point */
/* operations of celt_inner_prod_neon(), and both functions should have bit */
/* exact output. */
static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
for (i = 0; i < N - 3; i += 4) {
xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
}
xy0 += xy2;
xy1 += xy3;
xy = xy0 + xy1;
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
return xy;
}
/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point */
/* operations of dual_inner_prod_neon(), and both functions should have bit */
/* exact output. */
static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0;
for (i = 0; i < N - 3; i += 4) {
xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]);
xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]);
xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]);
xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]);
xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]);
xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]);
xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]);
xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]);
}
xy01_0 += xy01_2;
xy02_0 += xy02_2;
xy01_1 += xy01_3;
xy02_1 += xy02_3;
xy01 = xy01_0 + xy01_1;
xy02 = xy02_0 + xy02_1;
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
}
#endif /* OPUS_CHECK_ASM */
/* ========================================================================== */
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy;
float32x4_t xy_f32x4 = vdupq_n_f32(0);
float32x2_t xy_f32x2;
for (i = 0; i < N - 7; i += 8) {
float32x4_t x_f32x4, y_f32x4;
x_f32x4 = vld1q_f32(&x[i]);
y_f32x4 = vld1q_f32(&y[i]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
x_f32x4 = vld1q_f32(&x[i + 4]);
y_f32x4 = vld1q_f32(&y[i + 4]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
}
if (N - i >= 4) {
const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
i += 4;
}
xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
xy = vget_lane_f32(xy_f32x2, 0);
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
#ifdef OPUS_CHECK_ASM
celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL);
#endif
return xy;
}
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02;
float32x4_t xy01_f32x4 = vdupq_n_f32(0);
float32x4_t xy02_f32x4 = vdupq_n_f32(0);
float32x2_t xy01_f32x2, xy02_f32x2;
for (i = 0; i < N - 7; i += 8) {
float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
x_f32x4 = vld1q_f32(&x[i]);
y01_f32x4 = vld1q_f32(&y01[i]);
y02_f32x4 = vld1q_f32(&y02[i]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
x_f32x4 = vld1q_f32(&x[i + 4]);
y01_f32x4 = vld1q_f32(&y01[i + 4]);
y02_f32x4 = vld1q_f32(&y02[i + 4]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
}
if (N - i >= 4) {
const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
i += 4;
}
xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
xy01 = vget_lane_f32(xy01_f32x2, 0);
xy02 = vget_lane_f32(xy02_f32x2, 0);
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
#ifdef OPUS_CHECK_ASM
{
opus_val32 xy1_c, xy2_c;
dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c);
celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL);
celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL);
}
#endif
}
#endif /* FIXED_POINT */
+236 -94
View File
@@ -65,19 +65,19 @@ opus_uint32 celt_lcg_rand(opus_uint32 seed)
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
with this approximation is important because it has an impact on the bit allocation */
static opus_int16 bitexact_cos(opus_int16 x)
opus_int16 bitexact_cos(opus_int16 x)
{
opus_int32 tmp;
opus_int16 x2;
tmp = (4096+((opus_int32)(x)*(x)))>>13;
celt_assert(tmp<=32767);
celt_sig_assert(tmp<=32767);
x2 = tmp;
x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
celt_assert(x2<=32766);
celt_sig_assert(x2<=32766);
return 1+x2;
}
static int bitexact_log2tan(int isin,int icos)
int bitexact_log2tan(int isin,int icos)
{
int lc;
int ls;
@@ -92,10 +92,11 @@ static int bitexact_log2tan(int isin,int icos)
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
(void)arch;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
@@ -155,7 +156,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
#else /* FIXED_POINT */
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
@@ -164,7 +165,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
for (i=0;i<end;i++)
{
opus_val32 sum;
sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
bandE[i+c*m->nbEBands] = celt_sqrt(sum);
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/
}
@@ -224,9 +225,9 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
#endif
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
#ifndef FIXED_POINT
g = celt_exp2(lg);
g = celt_exp2(MIN32(32.f, lg));
#else
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
@@ -241,12 +242,12 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
if (shift < -2)
/* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
capping the gain here, which is equivalent to a cap of 18 on lg.
This shouldn't trigger unless the bitstream is already corrupted. */
if (shift <= -2)
{
g = 32767;
g = 16384;
shift = -2;
}
do {
@@ -281,7 +282,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
N0 = m->eBands[i+1]-m->eBands[i];
/* depth in 1/8 bits */
celt_assert(pulses[i]>=0);
celt_sig_assert(pulses[i]>=0);
depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
#ifdef FIXED_POINT
@@ -360,6 +361,30 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
}
}
/* Compute the weights to use for optimizing normalized distortion across
channels. We use the amplitude to weight square distortion, which means
that we use the square root of the value we would have been using if we
wanted to minimize the MSE in the non-normalized domain. This roughly
corresponds to some quick-and-dirty perceptual experiments I ran to
measure inter-aural masking (there doesn't seem to be any published data
on the topic). */
static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
{
celt_ener minE;
#ifdef FIXED_POINT
int shift;
#endif
minE = MIN32(Ex, Ey);
/* Adjustment to make the weights a bit more conservative. */
Ex = ADD32(Ex, minE/3);
Ey = ADD32(Ey, minE/3);
#ifdef FIXED_POINT
shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
#endif
w[0] = VSHR32(Ex, shift);
w[1] = VSHR32(Ey, shift);
}
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
{
int i = bandID;
@@ -453,7 +478,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
/* Decide whether we should spread the pulses in the current frame */
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M)
int end, int C, int M, const int *spread_weight)
{
int i, c, N0;
int sum = 0, nbBands=0;
@@ -494,8 +519,8 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
if (i>m->nbEBands-4)
hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
sum += tmp*256;
nbBands++;
sum += tmp*spread_weight[i];
nbBands+=spread_weight[i];
}
} while (++c<C);
@@ -519,7 +544,7 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
/*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
celt_assert(nbBands>0); /* end has to be non-zero */
celt_assert(sum>=0);
sum = celt_udiv(sum, nbBands);
sum = celt_udiv((opus_int32)sum<<8, nbBands);
/* Recursive averaging */
sum = (sum+*average)>>1;
*average = sum;
@@ -647,6 +672,7 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
struct band_ctx {
int encode;
int resynth;
const CELTMode *m;
int i;
int intensity;
@@ -657,6 +683,9 @@ struct band_ctx {
const celt_ener *bandE;
opus_uint32 seed;
int arch;
int theta_round;
int disable_inv;
int avoid_split_noise;
};
struct split_ctx {
@@ -714,8 +743,35 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
if (qn!=1)
{
if (encode)
itheta = (itheta*(opus_int32)qn+8192)>>14;
{
if (!stereo || ctx->theta_round == 0)
{
itheta = (itheta*(opus_int32)qn+8192)>>14;
if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
{
/* Check if the selected value of theta will cause the bit allocation
to inject noise on one side. If so, make sure the energy of that side
is zero. */
int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
imid = bitexact_cos((opus_int16)unquantized);
iside = bitexact_cos((opus_int16)(16384-unquantized));
delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
if (delta > *b)
itheta = qn;
else if (delta < -*b)
itheta = 0;
}
} else {
int down;
/* Bias quantization towards itheta=0 and itheta=16384. */
int bias = itheta > 8192 ? 32767/qn : -32767/qn;
down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
if (ctx->theta_round < 0)
itheta = down;
else
itheta = down+1;
}
}
/* Entropy coding of the angle. We use a uniform pdf for the
time split, a step for stereo, and a triangular one for the rest. */
if (stereo && N>2)
@@ -793,7 +849,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
} else if (stereo) {
if (encode)
{
inv = itheta > 8192;
inv = itheta > 8192 && !ctx->disable_inv;
if (inv)
{
int j;
@@ -810,6 +866,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
inv = ec_dec_bit_logp(ec, 2);
} else
inv = 0;
/* inv flag override to avoid problems with downmixing. */
if (ctx->disable_inv)
inv = 0;
itheta = 0;
}
qalloc = ec_tell_frac(ec) - tell;
@@ -842,14 +901,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
sctx->itheta = itheta;
sctx->qalloc = qalloc;
}
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
celt_norm *lowband_out)
{
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int c;
int stereo;
celt_norm *x = X;
@@ -872,9 +926,8 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
sign = ec_dec_bits(ec, 1);
}
ctx->remaining_bits -= 1<<BITRES;
b-=1<<BITRES;
}
if (resynth)
if (ctx->resynth)
x[0] = sign ? -NORM_SCALING : NORM_SCALING;
x = Y;
} while (++c<1+stereo);
@@ -899,11 +952,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
int B0=B;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
celt_norm *Y=NULL;
int encode;
const CELTMode *m;
@@ -935,8 +983,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
fill = (fill&1)|(fill<<1);
B = (B+1)>>1;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
LM, 0, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
imid = sctx.imid;
iside = sctx.iside;
delta = sctx.delta;
@@ -970,24 +1017,20 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
rebalance = ctx->remaining_bits;
if (mbits >= sbits)
{
cm = quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
} else {
cm = quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
}
} else {
@@ -1012,18 +1055,14 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
/* Finally do the actual quantization */
if (encode)
{
cm = alg_quant(X, N, K, spread, B, ec
#ifdef RESYNTH
, gain
#endif
);
cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
} else {
cm = alg_unquant(X, N, K, spread, B, ec, gain);
}
} else {
/* If there's no pulse, fill the band anyway */
int j;
if (resynth)
if (ctx->resynth)
{
unsigned cm_mask;
/* B can be as large as 16, so this shift might overflow an int on a
@@ -1080,11 +1119,6 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
int recombine=0;
int longBlocks;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int k;
int encode;
int tf_change;
@@ -1099,7 +1133,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
/* Special case for one sample */
if (N==1)
{
return quant_band_n1(ctx, X, NULL, b, lowband_out);
return quant_band_n1(ctx, X, NULL, lowband_out);
}
if (tf_change>0)
@@ -1151,11 +1185,10 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
}
cm = quant_partition(ctx, X, N, b, B, lowband,
LM, gain, fill);
cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
/* Undo the sample reorganization going from time order to frequency order */
if (B0>1)
@@ -1208,11 +1241,6 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
int inv = 0;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int mbits, sbits, delta;
int itheta;
int qalloc;
@@ -1227,13 +1255,12 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
/* Special case for one sample */
if (N==1)
{
return quant_band_n1(ctx, X, Y, b, lowband_out);
return quant_band_n1(ctx, X, Y, lowband_out);
}
orig_fill = fill;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
LM, 1, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
inv = sctx.inv;
imid = sctx.imid;
iside = sctx.iside;
@@ -1281,13 +1308,13 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
sign = 1-2*sign;
/* We use orig_fill here because we want to fold the side, but if
itheta==16384, we'll have cleared the low bits of fill. */
cm = quant_band(ctx, x2, N, mbits, B, lowband,
LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, orig_fill);
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
and there's no need to worry about mixing with the other channel. */
y2[0] = -sign*x2[1];
y2[1] = sign*x2[0];
if (resynth)
if (ctx->resynth)
{
celt_norm tmp;
X[0] = MULT16_16_Q15(mid, X[0]);
@@ -1314,38 +1341,32 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
{
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm = quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm |= quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
} else {
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm = quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm |= quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
}
}
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
if (N!=2)
stereo_merge(X, Y, mid, N, ctx->arch);
@@ -1359,19 +1380,38 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
return cm;
}
static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
{
int n1, n2;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
n1 = M*(eBands[start+1]-eBands[start]);
n2 = M*(eBands[start+2]-eBands[start+1]);
/* Duplicate enough of the first band folding data to be able to fold the second band.
Copies no data for CELT-only mode. */
OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
if (dual_stereo)
OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
}
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
opus_uint32 *seed, int arch)
opus_uint32 *seed, int complexity, int arch, int disable_inv)
{
int i;
opus_int32 remaining_bits;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
VARDECL(celt_norm, _norm);
VARDECL(celt_norm, _lowband_scratch);
VARDECL(celt_norm, X_save);
VARDECL(celt_norm, Y_save);
VARDECL(celt_norm, X_save2);
VARDECL(celt_norm, Y_save2);
VARDECL(celt_norm, norm_save2);
int resynth_alloc;
celt_norm *lowband_scratch;
int B;
int M;
@@ -1379,10 +1419,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
int update_lowband = 1;
int C = Y_ != NULL ? 2 : 1;
int norm_offset;
int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !encode;
int resynth = !encode || theta_rdo;
#endif
struct band_ctx ctx;
SAVE_STACK;
@@ -1395,9 +1436,24 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
norm = _norm;
norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
/* We can use the last band as scratch space because we don't need that
scratch space for the last band. */
lowband_scratch = X_+M*eBands[m->nbEBands-1];
/* For decoding, we can use the last band as scratch space because we don't need that
scratch space for the last band and we don't care about the data there until we're
decoding the last band. */
if (encode && resynth)
resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
else
resynth_alloc = ALLOC_NONE;
ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
if (encode && resynth)
lowband_scratch = _lowband_scratch;
else
lowband_scratch = X_+M*eBands[m->nbEBands-1];
ALLOC(X_save, resynth_alloc, celt_norm);
ALLOC(Y_save, resynth_alloc, celt_norm);
ALLOC(X_save2, resynth_alloc, celt_norm);
ALLOC(Y_save2, resynth_alloc, celt_norm);
ALLOC(norm_save2, resynth_alloc, celt_norm);
lowband_offset = 0;
ctx.bandE = bandE;
@@ -1408,6 +1464,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
ctx.disable_inv = disable_inv;
ctx.resynth = resynth;
ctx.theta_round = 0;
/* Avoid injecting noise in the first band on transients. */
ctx.avoid_split_noise = B > 1;
for (i=start;i<end;i++)
{
opus_int32 tell;
@@ -1430,6 +1491,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
else
Y = NULL;
N = M*eBands[i+1]-M*eBands[i];
celt_assert(N > 0);
tell = ec_tell_frac(ec);
/* Compute how many bits we want to allocate to this band */
@@ -1445,8 +1507,15 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
b = 0;
}
#ifndef DISABLE_UPDATE_DRAFT
if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
lowband_offset = i;
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
#else
if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
lowband_offset = i;
#endif
tf_change = tf_res[i];
ctx.tf_change = tf_change;
@@ -1457,7 +1526,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
Y = norm;
lowband_scratch = NULL;
}
if (i==end-1)
if (last && !theta_rdo)
lowband_scratch = NULL;
/* Get a conservative estimate of the collapse_mask's for the bands we're
@@ -1472,7 +1541,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
fold_start = lowband_offset;
while(M*eBands[--fold_start] > effective_lowband+norm_offset);
fold_end = lowband_offset-1;
#ifndef DISABLE_UPDATE_DRAFT
while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
#else
while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
#endif
x_cm = y_cm = 0;
fold_i = fold_start; do {
x_cm |= collapse_masks[fold_i*C+0];
@@ -1505,13 +1578,79 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
} else {
if (Y!=NULL)
{
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
if (theta_rdo && i < intensity)
{
ec_ctx ec_save, ec_save2;
struct band_ctx ctx_save, ctx_save2;
opus_val32 dist0, dist1;
unsigned cm, cm2;
int nstart_bytes, nend_bytes, save_bytes;
unsigned char *bytes_buf;
unsigned char bytes_save[1275];
opus_val16 w[2];
compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
/* Make a copy. */
cm = x_cm|y_cm;
ec_save = *ec;
ctx_save = ctx;
OPUS_COPY(X_save, X, N);
OPUS_COPY(Y_save, Y, N);
/* Encode and round down. */
ctx.theta_round = -1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
/* Save first result. */
cm2 = x_cm;
ec_save2 = *ec;
ctx_save2 = ctx;
OPUS_COPY(X_save2, X, N);
OPUS_COPY(Y_save2, Y, N);
if (!last)
OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
nstart_bytes = ec_save.offs;
nend_bytes = ec_save.storage;
bytes_buf = ec_save.buf+nstart_bytes;
save_bytes = nend_bytes-nstart_bytes;
OPUS_COPY(bytes_save, bytes_buf, save_bytes);
/* Restore */
*ec = ec_save;
ctx = ctx_save;
OPUS_COPY(X, X_save, N);
OPUS_COPY(Y, Y_save, N);
#ifndef DISABLE_UPDATE_DRAFT
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
#endif
/* Encode and round up. */
ctx.theta_round = 1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
if (dist0 >= dist1) {
x_cm = cm2;
*ec = ec_save2;
ctx = ctx_save2;
OPUS_COPY(X, X_save2, N);
OPUS_COPY(Y, Y_save2, N);
if (!last)
OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
OPUS_COPY(bytes_buf, bytes_save, save_bytes);
}
} else {
ctx.theta_round = 0;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
}
} else {
x_cm = quant_band(&ctx, X, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
}
y_cm = x_cm;
}
@@ -1521,6 +1660,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
/* Update the folding position only as long as we have 1 bit/sample depth. */
update_lowband = b>(N<<BITRES);
/* We only need to avoid noise on a split for the first band. After that, we
have folding. */
ctx.avoid_split_noise = 0;
}
*seed = ctx.seed;
+6 -3
View File
@@ -36,12 +36,15 @@
#include "entdec.h"
#include "rate.h"
opus_int16 bitexact_cos(opus_int16 x);
int bitexact_log2tan(int isin,int icos);
/** Compute the amplitude (sqrt energy) in each of the bands
* @param m Mode data
* @param X Spectrum
* @param bandE Square root of the energy for each band (returned)
*/
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
@@ -69,7 +72,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M);
int end, int C, int M, const int *spread_weight);
#ifdef MEASURE_NORM_MSE
void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
@@ -105,7 +108,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
int arch);
int complexity, int arch, int disable_inv);
void anti_collapse(const CELTMode *m, celt_norm *X_,
unsigned char *collapse_masks, int LM, int C, int size, int start,
+21 -4
View File
@@ -111,26 +111,31 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
t = SATURATE(t, SIG_SAT);
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
t = SATURATE(t, SIG_SAT);
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
t = SATURATE(t, SIG_SAT);
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
t = SATURATE(t, SIG_SAT);
y[i+4] = t;
}
#ifdef CUSTOM_MODES
@@ -141,6 +146,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=x3;
x3=x2;
@@ -169,6 +175,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+ MULT16_32_Q15(g10,x2)
+ MULT16_32_Q15(g11,ADD32(x1,x3))
+ MULT16_32_Q15(g12,ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@@ -200,6 +207,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
OPUS_MOVE(y, x, N);
return;
}
/* When the gain is zero, T0 and/or T1 is set to zero. We need
to have then be at least 2 to avoid processing garbage data. */
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
@@ -225,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@@ -244,11 +256,16 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
#endif /* OVERRIDE_comb_filter */
/* TF change table. Positive values mean better frequency resolution (longer
effective window), whereas negative values mean better time resolution
(shorter effective window). The second index is computed as:
4*isTransient + 2*tf_select + per_band_flag */
const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1},
{0, -1, 0, -2, 1, 0, 1,-1},
{0, -2, 0, -3, 2, 0, 1,-1},
{0, -2, 0, -3, 3, 0, 1,-1},
/*isTransient=0 isTransient=1 */
{0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */
{0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */
{0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */
{0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */
};
+27 -5
View File
@@ -50,6 +50,8 @@ extern "C" {
#define CELTDecoder OpusCustomDecoder
#define CELTMode OpusCustomMode
#define LEAK_BANDS 19
typedef struct {
int valid;
float tonality;
@@ -57,17 +59,27 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
int bandwidth;
}AnalysisInfo;
float music_prob_min;
float music_prob_max;
int bandwidth;
float activity_probability;
float max_pitch_ratio;
/* Store as Q6 char to save space. */
unsigned char leak_boost[LEAK_BANDS];
} AnalysisInfo;
typedef struct {
int signalType;
int offset;
} SILKInfo;
#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
/* Encoder/decoder Requests */
#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
/* Expose this option again when variable framesize actually works */
#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
/* Encoder/decoder Requests */
#define CELT_SET_PREDICTION_REQUEST 10002
@@ -116,6 +128,9 @@ typedef struct {
#define OPUS_SET_ENERGY_MASK_REQUEST 10026
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
#define CELT_SET_SILK_INFO_REQUEST 10028
#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
/* Encoder stuff */
int celt_encoder_get_size(int channels);
@@ -194,6 +209,13 @@ static OPUS_INLINE int fromOpus(unsigned char c)
extern const signed char tf_select_table[4][8];
#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
void validate_celt_decoder(CELTDecoder *st);
#define VALIDATE_CELT_DECODER(st) validate_celt_decoder(st)
#else
#define VALIDATE_CELT_DECODER(st)
#endif
int resampling_factor(opus_int32 rate);
void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+163 -33
View File
@@ -51,6 +51,14 @@
#include "celt_lpc.h"
#include "vq.h"
/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
current value corresponds to a pitch of 66.67 Hz. */
#define PLC_PITCH_LAG_MAX (720)
/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
#define NORM_ALIASING_HACK
#endif
@@ -73,6 +81,7 @@ struct OpusCustomDecoder {
int downsample;
int start, end;
int signalling;
int disable_inv;
int arch;
/* Everything beyond this point gets cleared on a reset */
@@ -100,6 +109,44 @@ struct OpusCustomDecoder {
/* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
};
#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
/* Make basic checks on the CELT state to ensure we don't end
up writing all over memory. */
void validate_celt_decoder(CELTDecoder *st)
{
#ifndef CUSTOM_MODES
celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL));
celt_assert(st->overlap == 120);
celt_assert(st->end <= 21);
#else
/* From Section 4.3 in the spec: "The normal CELT layer uses 21 of those bands,
though Opus Custom (see Section 6.2) may use a different number of bands"
Check if it's within the maximum number of Bark frequency bands instead */
celt_assert(st->end <= 25);
#endif
celt_assert(st->channels == 1 || st->channels == 2);
celt_assert(st->stream_channels == 1 || st->stream_channels == 2);
celt_assert(st->downsample > 0);
celt_assert(st->start == 0 || st->start == 17);
celt_assert(st->start < st->end);
#ifdef OPUS_ARCHMASK
celt_assert(st->arch >= 0);
celt_assert(st->arch <= OPUS_ARCHMASK);
#endif
celt_assert(st->last_pitch_index <= PLC_PITCH_LAG_MAX);
celt_assert(st->last_pitch_index >= PLC_PITCH_LAG_MIN || st->last_pitch_index == 0);
celt_assert(st->postfilter_period < MAX_PERIOD);
celt_assert(st->postfilter_period >= COMBFILTER_MINPERIOD || st->postfilter_period == 0);
celt_assert(st->postfilter_period_old < MAX_PERIOD);
celt_assert(st->postfilter_period_old >= COMBFILTER_MINPERIOD || st->postfilter_period_old == 0);
celt_assert(st->postfilter_tapset <= 2);
celt_assert(st->postfilter_tapset >= 0);
celt_assert(st->postfilter_tapset_old <= 2);
celt_assert(st->postfilter_tapset_old >= 0);
}
#endif
int celt_decoder_get_size(int channels)
{
const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
@@ -163,6 +210,11 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
#ifndef DISABLE_UPDATE_DRAFT
st->disable_inv = channels == 1;
#else
st->disable_inv = 0;
#endif
st->arch = opus_select_arch();
opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
@@ -177,6 +229,36 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
}
#endif /* CUSTOM_MODES */
#ifndef CUSTOM_MODES
/* Special case for stereo with no downsampling and no accumulation. This is
quite common and we can make it faster by processing both channels in the
same loop, reducing overhead due to the dependency loop in the IIR filter. */
static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
celt_sig *mem)
{
celt_sig * OPUS_RESTRICT x0;
celt_sig * OPUS_RESTRICT x1;
celt_sig m0, m1;
int j;
x0=in[0];
x1=in[1];
m0 = mem[0];
m1 = mem[1];
for (j=0;j<N;j++)
{
celt_sig tmp0, tmp1;
/* Add VERY_SMALL to x[] first to reduce dependency chain. */
tmp0 = x0[j] + VERY_SMALL + m0;
tmp1 = x1[j] + VERY_SMALL + m1;
m0 = MULT16_32_Q15(coef0, tmp0);
m1 = MULT16_32_Q15(coef0, tmp1);
pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
}
mem[0] = m0;
mem[1] = m1;
}
#endif
#ifndef RESYNTH
static
@@ -190,6 +272,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
opus_val16 coef0;
VARDECL(celt_sig, scratch);
SAVE_STACK;
#ifndef CUSTOM_MODES
/* Short version for common case. */
if (downsample == 1 && C == 2 && !accum)
{
deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
return;
}
#endif
#ifndef FIXED_POINT
(void)accum;
celt_assert(accum==0);
@@ -225,7 +315,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
scratch[j] = tmp;
}
@@ -246,7 +336,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
{
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
}
@@ -333,7 +423,7 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
downsample, silence);
for (i=0;i<N;i++)
freq[i] = HALF32(ADD32(freq[i],freq2[i]));
freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
} else {
@@ -345,6 +435,12 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
} while (++c<CC);
}
/* Saturate IMDCT output so that we can't overflow in the pitch postfilter
or in the */
c=0; do {
for (i=0;i<N;i++)
out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
} while (++c<CC);
RESTORE_STACK;
}
@@ -387,14 +483,6 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
}
}
/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
current value corresponds to a pitch of 66.67 Hz. */
#define PLC_PITCH_LAG_MAX (720)
/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
{
int pitch_index;
@@ -504,12 +592,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
} else {
int exc_length;
/* Pitch-based PLC */
const opus_val16 *window;
opus_val16 *exc;
opus_val16 fade = Q15ONE;
int pitch_index;
VARDECL(opus_val32, etmp);
VARDECL(opus_val16, exc);
VARDECL(opus_val16, _exc);
VARDECL(opus_val16, fir_tmp);
if (loss_count == 0)
{
@@ -519,8 +610,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
fade = QCONST16(.8f,15);
}
/* We want the excitation for 2 pitch periods in order to look for a
decaying signal, but we can't get more than MAX_PERIOD. */
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
ALLOC(etmp, overlap, opus_val32);
ALLOC(exc, MAX_PERIOD, opus_val16);
ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
ALLOC(fir_tmp, exc_length, opus_val16);
exc = _exc+LPC_ORDER;
window = mode->window;
c=0; do {
opus_val16 decay;
@@ -529,13 +626,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
celt_sig *buf;
int extrapolation_offset;
int extrapolation_len;
int exc_length;
int j;
buf = decode_mem[c];
for (i=0;i<MAX_PERIOD;i++) {
exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
}
for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
exc[i-LPC_ORDER] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
if (loss_count == 0)
{
@@ -561,22 +656,32 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
#endif
}
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
#ifdef FIXED_POINT
/* For fixed-point, apply bandwidth expansion until we can guarantee that
no overflow can happen in the IIR filter. This means:
32768*sum(abs(filter)) < 2^31 */
while (1) {
opus_val16 tmp=Q15ONE;
opus_val32 sum=QCONST16(1., SIG_SHIFT);
for (i=0;i<LPC_ORDER;i++)
sum += ABS16(lpc[c*LPC_ORDER+i]);
if (sum < 65535) break;
for (i=0;i<LPC_ORDER;i++)
{
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
}
}
#endif
}
/* We want the excitation for 2 pitch periods in order to look for a
decaying signal, but we can't get more than MAX_PERIOD. */
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
/* Initialize the LPC history with the samples just before the start
of the region for which we're computing the excitation. */
{
opus_val16 lpc_mem[LPC_ORDER];
for (i=0;i<LPC_ORDER;i++)
{
lpc_mem[i] =
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
}
/* Compute the excitation for exc_length samples before the loss. */
/* Compute the excitation for exc_length samples before the loss. We need the copy
because celt_fir() cannot filter in-place. */
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
fir_tmp, exc_length, LPC_ORDER, st->arch);
OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
}
/* Check if the waveform is decaying, and if so how fast.
@@ -630,9 +735,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
tmp = ROUND16(
buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
SIG_SHIFT);
S1 += SHR32(MULT16_16(tmp, tmp), 8);
S1 += SHR32(MULT16_16(tmp, tmp), 10);
}
{
opus_val16 lpc_mem[LPC_ORDER];
/* Copy the last decoded samples (prior to the overlap region) to
@@ -644,6 +748,10 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
lpc_mem, st->arch);
#ifdef FIXED_POINT
for (i=0; i < extrapolation_len; i++)
buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
#endif
}
/* Check if the synthesis energy is higher than expected, which can
@@ -654,7 +762,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
for (i=0;i<extrapolation_len;i++)
{
opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
S2 += SHR32(MULT16_16(tmp, tmp), 8);
S2 += SHR32(MULT16_16(tmp, tmp), 10);
}
/* This checks for an "explosion" in the synthesis. */
#ifdef FIXED_POINT
@@ -762,6 +870,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
const opus_int16 *eBands;
ALLOC_STACK;
VALIDATE_CELT_DECODER(st);
mode = st->mode;
nbEBands = mode->nbEBands;
overlap = mode->overlap;
@@ -956,7 +1065,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
ALLOC(pulses, nbEBands, int);
ALLOC(fine_priority, nbEBands, int);
codedBands = compute_allocation(mode, start, end, offsets, cap,
codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
@@ -979,7 +1088,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch);
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
st->arch, st->disable_inv);
if (anti_collapse_rsv > 0)
{
@@ -1234,6 +1344,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
*value=st->rng;
}
break;
case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
if(value<0 || value>1)
{
goto bad_arg;
}
st->disable_inv = value;
}
break;
case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->disable_inv;
}
break;
default:
goto bad_request;
}
+298 -101
View File
@@ -73,8 +73,8 @@ struct OpusCustomEncoder {
int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */
int loss_rate;
int lsb_depth;
int variable_duration;
int lfe;
int disable_inv;
int arch;
/* Everything beyond this point gets cleared on a reset */
@@ -98,6 +98,7 @@ struct OpusCustomEncoder {
#endif
int consec_transient;
AnalysisInfo analysis;
SILKInfo silk_info;
opus_val32 preemph_memE[2];
opus_val32 preemph_memD[2];
@@ -123,6 +124,7 @@ struct OpusCustomEncoder {
/* opus_val16 oldBandE[], Size = channels*mode->nbEBands */
/* opus_val16 oldLogE[], Size = channels*mode->nbEBands */
/* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */
/* opus_val16 energyError[], Size = channels*mode->nbEBands */
};
int celt_encoder_get_size(int channels)
@@ -136,9 +138,10 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int
int size = sizeof(struct CELTEncoder)
+ (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */
+ channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+ 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+ 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
/* opus_val16 oldLogE[channels*mode->nbEBands]; */
/* opus_val16 oldLogE2[channels*mode->nbEBands]; */
/* opus_val16 energyError[channels*mode->nbEBands]; */
return size;
}
@@ -178,7 +181,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
st->arch = arch;
st->constrained_vbr = 1;
@@ -223,7 +225,8 @@ void opus_custom_encoder_destroy(CELTEncoder *st)
static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
opus_val16 *tf_estimate, int *tf_chan)
opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients,
int *weak_transient)
{
int i;
VARDECL(opus_val16, tmp);
@@ -233,6 +236,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
int c;
opus_val16 tf_max;
int len2;
/* Forward masking: 6.7 dB/ms. */
#ifdef FIXED_POINT
int forward_shift = 4;
#else
opus_val16 forward_decay = QCONST16(.0625f,15);
#endif
/* Table of 6*64/x, trained on real data to minimize the average error */
static const unsigned char inv_table[128] = {
255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
@@ -247,6 +256,19 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
SAVE_STACK;
ALLOC(tmp, len, opus_val16);
*weak_transient = 0;
/* For lower bitrates, let's be more conservative and have a forward masking
decay of 3.3 dB/ms. This avoids having to code transients at very low
bitrate (mostly for hybrid), which can result in unstable energy and/or
partial collapse. */
if (allow_weak_transients)
{
#ifdef FIXED_POINT
forward_shift = 5;
#else
forward_decay = QCONST16(.03125f,15);
#endif
}
len2=len/2;
for (c=0;c<C;c++)
{
@@ -269,7 +291,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mem0 = mem1 + y - 2*x;
mem1 = x - .5f*y;
#endif
tmp[i] = EXTRACT16(SHR32(y,2));
tmp[i] = SROUND16(y, 2);
/*printf("%f ", tmp[i]);*/
}
/*printf("\n");*/
@@ -280,7 +302,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Normalize tmp to max range */
{
int shift=0;
shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len)));
if (shift!=0)
{
for (i=0;i<len;i++)
@@ -299,9 +321,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mean += x2;
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(x2-mem0,4);
tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
#else
tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
#endif
mem0 = tmp[i];
}
@@ -311,6 +333,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Backward pass to compute the pre-echo threshold */
for (i=len2-1;i>=0;i--)
{
/* Backward masking: 13.9 dB/ms. */
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
@@ -339,6 +362,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Compute harmonic mean discarding the unreliable boundaries
The data is smooth, so we only take 1/4th of the samples */
unmask=0;
/* We should never see NaNs here. If we find any, then something really bad happened and we better abort
before it does any damage later on. If these asserts are disabled (no hardening), then the table
lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX
that crash on NaN since it could result in a worse issue later on. */
celt_assert(!celt_isnan(tmp[0]));
celt_assert(!celt_isnan(norm));
for (i=12;i<len2-5;i+=4)
{
int id;
@@ -359,7 +388,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
}
}
is_transient = mask_metric>200;
/* For low bitrates, define "weak transients" that need to be
handled differently to avoid partial collapse. */
if (allow_weak_transients && is_transient && mask_metric<600) {
is_transient = 0;
*weak_transient = 1;
}
/* Arbitrary metric for VBR boost */
tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
/* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
@@ -549,7 +583,7 @@ static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias
static int tf_analysis(const CELTMode *m, int len, int isTransient,
int *tf_res, int lambda, celt_norm *X, int N0, int LM,
int *tf_sum, opus_val16 tf_estimate, int tf_chan)
opus_val16 tf_estimate, int tf_chan, int *importance)
{
int i;
VARDECL(int, metric);
@@ -574,7 +608,6 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
ALLOC(path0, len, int);
ALLOC(path1, len, int);
*tf_sum = 0;
for (i=0;i<len;i++)
{
int k, N;
@@ -629,27 +662,26 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
metric[i] = 2*best_level;
else
metric[i] = -2*best_level;
*tf_sum += (isTransient ? LM : 0) - metric[i]/2;
/* For bands that can't be split to -1, set the metric to the half-way point to avoid
biasing the decision */
if (narrow && (metric[i]==0 || metric[i]==-2*LM))
metric[i]-=1;
/*printf("%d ", metric[i]);*/
/*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/
}
/*printf("\n");*/
/* Search for the optimal tf resolution, including tf_select */
tf_select = 0;
for (sel=0;sel<2;sel++)
{
cost0 = 0;
cost1 = isTransient ? 0 : lambda;
cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda);
for (i=1;i<len;i++)
{
int curr0, curr1;
curr0 = IMIN(cost0, cost1 + lambda);
curr1 = IMIN(cost0 + lambda, cost1);
cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
}
cost0 = IMIN(cost0, cost1);
selcost[sel]=cost0;
@@ -658,8 +690,8 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
* If tests confirm it's useful for non-transients, we could allow it. */
if (selcost[1]<selcost[0] && isTransient)
tf_select=1;
cost0 = 0;
cost1 = isTransient ? 0 : lambda;
cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda);
/* Viterbi forward pass */
for (i=1;i<len;i++)
{
@@ -687,8 +719,8 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
curr1 = from1;
path1[i]= 1;
}
cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
}
tf_res[len-1] = cost0 < cost1 ? 0 : 1;
/* Viterbi backward pass to check the decisions */
@@ -754,7 +786,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
int intensity, opus_val16 surround_trim, int arch)
int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch)
{
int i;
opus_val32 diff=0;
@@ -762,6 +794,14 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
int trim_index;
opus_val16 trim = QCONST16(5.f, 8);
opus_val16 logXC, logXC2;
/* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less
clear what's best, so we're keeping it as it was before, at least for now. */
if (equiv_rate < 64000) {
trim = QCONST16(4.f, 8);
} else if (equiv_rate < 80000) {
opus_int32 frac = (equiv_rate-64000) >> 10;
trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac;
}
if (C==2)
{
opus_val16 sum = 0; /* Q10 */
@@ -809,7 +849,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
} while (++c<C);
diff /= C*(end-1);
/*printf("%f\n", diff);*/
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= SHR16(surround_trim, DB_SHIFT-8);
trim -= 2*SHR16(tf_estimate, 14-8);
#ifndef DISABLE_FLOAT_API
@@ -930,7 +970,8 @@ static opus_val16 median_of_3(const opus_val16 *x)
static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc,
AnalysisInfo *analysis, int *importance, int *spread_weight)
{
int i, c;
opus_int32 tot_boost=0;
@@ -956,6 +997,42 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
for (i=0;i<end;i++)
maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
} while (++c<C);
{
/* Compute a really simple masking model to avoid taking into account completely masked
bands when computing the spreading decision. */
VARDECL(opus_val16, mask);
VARDECL(opus_val16, sig);
ALLOC(mask, nbEBands, opus_val16);
ALLOC(sig, nbEBands, opus_val16);
for (i=0;i<end;i++)
mask[i] = bandLogE[i]-noise_floor[i];
if (C==2)
{
for (i=0;i<end;i++)
mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]);
}
OPUS_COPY(sig, mask, end);
for (i=1;i<end;i++)
mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT));
for (i=end-2;i>=0;i--)
mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT));
for (i=0;i<end;i++)
{
/* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/
opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]);
/* Clamp SMR to make sure we're not shifting by something negative or too large. */
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT);
#else
int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr)));
#endif
spread_weight[i] = 32 >> shift;
}
/*for (i=0;i<end;i++)
printf("%d ", spread_weight[i]);
printf("\n");*/
}
/* Make sure that dynamic allocation can't make us bust the budget */
if (effectiveBytes > 50 && LM>=1 && !lfe)
{
@@ -1012,6 +1089,14 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
}
for (i=start;i<end;i++)
follower[i] = MAX16(follower[i], surround_dynalloc[i]);
for (i=start;i<end;i++)
{
#ifdef FIXED_POINT
importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16);
#else
importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))));
#endif
}
/* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
if ((!vbr || constrained_vbr)&&!isTransient)
{
@@ -1020,14 +1105,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
}
for (i=start;i<end;i++)
{
int width;
int boost;
int boost_bits;
if (i<8)
follower[i] *= 2;
if (i>=12)
follower[i] = HALF16(follower[i]);
}
#ifdef DISABLE_FLOAT_API
(void)analysis;
#else
if (analysis->valid)
{
for (i=start;i<IMIN(LEAK_BANDS, end);i++)
follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i];
}
#endif
for (i=start;i<end;i++)
{
int width;
int boost;
int boost_bits;
follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
width = C*(eBands[i+1]-eBands[i])<<LM;
@@ -1042,11 +1139,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
boost_bits = boost*6<<BITRES;
}
/* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
/* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */
if ((!vbr || (constrained_vbr&&!isTransient))
&& (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
&& (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3)
{
opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3);
opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3);
offsets[i] = cap-tot_boost;
tot_boost = cap;
break;
@@ -1055,6 +1152,9 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
tot_boost += boost_bits;
}
}
} else {
for (i=start;i<end;i++)
importance[i] = 13;
}
*tot_boost_ = tot_boost;
RESTORE_STACK;
@@ -1063,7 +1163,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
{
int c;
VARDECL(celt_sig, _pre);
@@ -1119,7 +1219,12 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
gain1 = 0;
pitch_index = COMBFILTER_MINPERIOD;
}
#ifndef DISABLE_FLOAT_API
if (analysis->valid)
gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio);
#else
(void)analysis;
#endif
/* Gain threshold for enabling the prefilter/postfilter */
pf_threshold = QCONST16(.2f,15);
@@ -1193,7 +1298,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking,
int lfe, int has_surround_mask, opus_val16 surround_masking,
opus_val16 temporal_vbr)
{
/* The target rate in 8th bits per frame */
@@ -1235,10 +1340,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
}
/* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
target += tot_boost-(16<<LM);
target += tot_boost-(19<<LM);
/* Apply transient boost, compensating for average boost. */
tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
QCONST16(0.02f,14) : QCONST16(0.04f,14);
tf_calibration = QCONST16(0.044f,14);
target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
#ifndef DISABLE_FLOAT_API
@@ -1249,7 +1353,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
float tonal;
/* Tonality boost (compensating for the average). */
tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f;
tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
if (pitch_change)
tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f);
@@ -1279,21 +1383,11 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
/*printf("%f %d\n", maxDepth, floor_depth);*/
}
if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
/* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate
for long. Needs tuning. */
if ((!has_surround_mask||lfe) && constrained_vbr)
{
opus_val16 rate_factor = Q15ONE;
if (bitrate < 64000)
{
#ifdef FIXED_POINT
rate_factor = MAX16(0,(bitrate-32000));
#else
rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
#endif
}
if (constrained_vbr)
rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target);
}
if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
@@ -1327,11 +1421,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
VARDECL(int, pulses);
VARDECL(int, cap);
VARDECL(int, offsets);
VARDECL(int, importance);
VARDECL(int, spread_weight);
VARDECL(int, fine_priority);
VARDECL(int, tf_res);
VARDECL(unsigned char, collapse_masks);
celt_sig *prefilter_mem;
opus_val16 *oldBandE, *oldLogE, *oldLogE2;
opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError;
int shortBlocks=0;
int isTransient=0;
const int CC = st->channels;
@@ -1343,7 +1439,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int end;
int effEnd;
int codedBands;
int tf_sum;
int alloc_trim;
int pitch_index=COMBFILTER_MINPERIOD;
opus_val16 gain1 = 0;
@@ -1355,6 +1450,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_int32 total_boost;
opus_int32 balance;
opus_int32 tell;
opus_int32 tell0_frac;
int prefilter_tapset=0;
int pf_on;
int anti_collapse_rsv;
@@ -1376,7 +1472,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_val16 surround_masking=0;
opus_val16 temporal_vbr=0;
opus_val16 surround_trim = 0;
opus_int32 equiv_rate = 510000;
opus_int32 equiv_rate;
int hybrid;
int weak_transient = 0;
int enable_tf_analysis;
VARDECL(opus_val16, surround_dynalloc);
ALLOC_STACK;
@@ -1386,6 +1485,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
eBands = mode->eBands;
start = st->start;
end = st->end;
hybrid = start != 0;
tf_estimate = 0;
if (nbCompressedBytes<2 || pcm==NULL)
{
@@ -1409,12 +1509,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
oldLogE = oldBandE + CC*nbEBands;
oldLogE2 = oldLogE + CC*nbEBands;
energyError = oldLogE2 + CC*nbEBands;
if (enc==NULL)
{
tell=1;
tell0_frac=tell=1;
nbFilledBytes=0;
} else {
tell0_frac=ec_tell_frac(enc);
tell=ec_tell(enc);
nbFilledBytes=(tell+4)>>3;
}
@@ -1467,10 +1569,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (st->bitrate!=OPUS_BITRATE_MAX)
nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
(tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
effectiveBytes = nbCompressedBytes;
effectiveBytes = nbCompressedBytes - nbFilledBytes;
}
equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50);
if (st->bitrate != OPUS_BITRATE_MAX)
equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50));
if (enc==NULL)
{
@@ -1558,17 +1661,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
int enabled;
int qg;
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf
&& st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf
&& st->complexity >= 5;
prefilter_tapset = st->tapset_decision;
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
pitch_change = 1;
if (pf_on==0)
{
if(start==0 && tell+16<=total_bits)
if(!hybrid && tell+16<=total_bits)
ec_enc_bit_logp(enc, 0, 1);
} else {
/*This block is not gated by a total bits check only because
@@ -1589,8 +1692,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
shortBlocks = 0;
if (st->complexity >= 1 && !st->lfe)
{
/* Reduces the likelihood of energy instability on fricatives at low bitrate
in hybrid mode. It seems like we still want to have real transients on vowels
though (small SILK quantization offset value). */
int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2;
isTransient = transient_analysis(in, N+overlap, CC,
&tf_estimate, &tf_chan);
&tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
}
if (LM>0 && ec_tell(enc)+3<=total_bits)
{
@@ -1610,16 +1717,19 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (secondMdct)
{
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
}
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
/* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered
at the Opus layer), just abort. */
celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N])));
if (CC==2&&C==1)
tf_chan = 0;
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
if (st->lfe)
{
@@ -1634,7 +1744,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
OPUS_CLEAR(surround_dynalloc, end);
/* This computes how much masking takes place between surround channels */
if (start==0&&st->energy_mask&&!st->lfe)
if (!hybrid&&st->energy_mask&&!st->lfe)
{
int mask_end;
int midband;
@@ -1736,14 +1846,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Last chance to catch any transient we might have missed in the
time-domain analysis */
if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid)
{
if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
{
isTransient = 1;
shortBlocks = M;
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
/* Compensate for the scaling of short vs long mdcts */
for (i=0;i<C*nbEBands;i++)
@@ -1760,31 +1870,59 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Band normalisation */
normalise_bands(mode, freq, X, bandE, effEnd, C, M);
enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe;
ALLOC(offsets, nbEBands, int);
ALLOC(importance, nbEBands, int);
ALLOC(spread_weight, nbEBands, int);
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight);
ALLOC(tf_res, nbEBands, int);
/* Disable variable tf resolution for hybrid and at very low bitrate */
if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe)
if (enable_tf_analysis)
{
int lambda;
if (effectiveBytes<40)
lambda = 12;
else if (effectiveBytes<60)
lambda = 6;
else if (effectiveBytes<100)
lambda = 4;
else
lambda = 3;
lambda*=2;
tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
lambda = IMAX(80, 20480/effectiveBytes + 2);
tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance);
for (i=effEnd;i<end;i++)
tf_res[i] = tf_res[effEnd-1];
} else if (hybrid && weak_transient)
{
/* For weak transients, we rely on the fact that improving time resolution using
TF on a long window is imperfect and will not result in an energy collapse at
low bitrate. */
for (i=0;i<end;i++)
tf_res[i] = 1;
tf_select=0;
} else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2)
{
/* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */
for (i=0;i<end;i++)
tf_res[i] = 0;
tf_select=isTransient;
} else {
tf_sum = 0;
for (i=0;i<end;i++)
tf_res[i] = isTransient;
tf_select=0;
}
ALLOC(error, C*nbEBands, opus_val16);
c=0;
do {
for (i=start;i<end;i++)
{
/* When the energy is stable, slightly bias energy quantization towards
the previous error to make the gain more stable (a constant offset is
better than fluctuations). */
if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT))
{
bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15));
}
}
} while (++c < C);
quant_coarse_energy(mode, start, end, effEnd, bandLogE,
oldBandE, total_bits, error, enc,
C, LM, nbAvailableBytes, st->force_intra,
@@ -1798,7 +1936,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
st->tapset_decision = 0;
st->spread_decision = SPREAD_NORMAL;
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0)
} else if (hybrid)
{
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
else if (isTransient)
st->spread_decision = SPREAD_NORMAL;
else
st->spread_decision = SPREAD_AGGRESSIVE;
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
{
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
@@ -1822,7 +1968,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
st->spread_decision = spreading_decision(mode, X,
&st->tonal_average, st->spread_decision, &st->hf_average,
&st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
&st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight);
}
/*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
/*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
@@ -1830,11 +1976,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
}
ALLOC(offsets, nbEBands, int);
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
/* For LFE, everything interesting is in the first band */
if (st->lfe)
offsets[0] = IMIN(8, effectiveBytes/3);
@@ -1896,12 +2037,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
alloc_trim = 5;
if (tell+(6<<BITRES) <= total_bits - total_boost)
{
if (st->lfe)
if (start > 0 || st->lfe)
{
st->stereo_saving = 0;
alloc_trim = 5;
else
} else {
alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
st->intensity, surround_trim, st->arch);
st->intensity, surround_trim, equiv_rate, st->arch);
}
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
tell = ec_tell_frac(enc);
}
@@ -1919,17 +2063,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
The CELT allocator will just not be able to use more than that anyway. */
nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
base_target = vbr_rate - ((40*C+20)<<BITRES);
if (!hybrid)
{
base_target = vbr_rate - ((40*C+20)<<BITRES);
} else {
base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES));
}
if (st->constrained_vbr)
base_target += (st->vbr_offset>>lm_diff);
target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
if (!hybrid)
{
target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
st->lastCodedBands, C, st->intensity, st->constrained_vbr,
st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
st->lfe, st->energy_mask!=NULL, surround_masking,
temporal_vbr);
} else {
target = base_target;
/* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */
if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM);
if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM);
/* Boosting bitrate on transients and vowels with significant temporal
spikes. */
target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES));
/* If we have a strong transient, let's make sure it has enough bits to code
the first two bands, so that it can use folding rather than noise. */
if (tf_estimate > QCONST16(.7f,14))
target = IMAX(target, 50<<BITRES);
}
/* The current offset is removed from the target and the space used
so far is added*/
target=target+tell;
@@ -1937,11 +2100,16 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
result in the encoder running out of bits.
The margin of 2 bytes ensures that none of the bust-prevention logic
in the decoder will have triggered so far. */
min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2;
/* Take into account the 37 bits we need to have left in the packet to
signal a redundant frame in hybrid mode. Creating a shorter packet would
create an entropy coder desync. */
if (hybrid)
min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3));
nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
/* By how much did we "miss" the target on that frame */
delta = target - vbr_rate;
@@ -1988,7 +2156,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->vbr_reservoir = 0;
/*printf ("+%d\n", adjust);*/
}
nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
/*printf("%d\n", nbCompressedBytes*50*8);*/
/* This moves the raw bits to take into account the new compressed size */
ec_enc_shrink(enc, nbCompressedBytes);
@@ -2023,7 +2191,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
#endif
if (st->lfe)
signalBandwidth = 1;
codedBands = compute_allocation(mode, start, end, offsets, cap,
codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
if (st->lastCodedBands)
@@ -2038,7 +2206,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
bandE, pulses, shortBlocks, st->spread_decision,
dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
balance, enc, LM, codedBands, &st->rng, st->arch);
balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv);
if (anti_collapse_rsv > 0)
{
@@ -2049,6 +2217,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ec_enc_bits(enc, anti_collapse_on, 1);
}
quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
OPUS_CLEAR(energyError, nbEBands*CC);
c=0;
do {
for (i=start;i<end;i++)
{
energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands]));
}
} while (++c < C);
if (silence)
{
@@ -2321,10 +2497,24 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
*value=st->lsb_depth;
}
break;
case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
st->variable_duration = value;
if(value<0 || value>1)
{
goto bad_arg;
}
st->disable_inv = value;
}
break;
case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->disable_inv;
}
break;
case OPUS_RESET_STATE:
@@ -2368,6 +2558,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
OPUS_COPY(&st->analysis, info, 1);
}
break;
case CELT_SET_SILK_INFO_REQUEST:
{
SILKInfo *info = va_arg(ap, SILKInfo *);
if (info)
OPUS_COPY(&st->silk_info, info, 1);
}
break;
case CELT_GET_MODE_REQUEST:
{
const CELTMode ** value = va_arg(ap, const CELTMode**);
+77 -47
View File
@@ -50,17 +50,21 @@ int p
#endif
OPUS_CLEAR(lpc, p);
#ifdef FIXED_POINT
if (ac[0] != 0)
#else
if (ac[0] > 1e-10f)
#endif
{
for (i = 0; i < p; i++) {
/* Sum up this iteration's reflection coefficient */
opus_val32 rr = 0;
for (j = 0; j < i; j++)
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
rr += SHR32(ac[i + 1],3);
r = -frac_div32(SHL32(rr,3), error);
rr += SHR32(ac[i + 1],6);
r = -frac_div32(SHL32(rr,6), error);
/* Update LPC coefficients and total error */
lpc[i] = SHR32(r,3);
lpc[i] = SHR32(r,6);
for (j = 0; j < (i+1)>>1; j++)
{
opus_val32 tmp1, tmp2;
@@ -73,74 +77,100 @@ int p
error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
/* Bail out once we get 30 dB gain */
#ifdef FIXED_POINT
if (error<SHR32(ac[0],10))
if (error<=SHR32(ac[0],10))
break;
#else
if (error<.001f*ac[0])
if (error<=.001f*ac[0])
break;
#endif
}
}
#ifdef FIXED_POINT
for (i=0;i<p;i++)
_lpc[i] = ROUND16(lpc[i],16);
{
/* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
fixes should also be applied there. */
int iter, idx = 0;
opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
for (iter = 0; iter < 10; iter++) {
maxabs = 0;
for (i = 0; i < p; i++) {
absval = ABS32(lpc[i]);
if (absval > maxabs) {
maxabs = absval;
idx = i;
}
}
maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */
if (maxabs > 32767) {
maxabs = MIN32(maxabs, 163838);
chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
SHR32(MULT32_32_32(maxabs, idx + 1), 2));
chirp_minus_one_Q16 = chirp_Q16 - 65536;
/* Apply bandwidth expansion. */
for (i = 0; i < p - 1; i++) {
lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
}
lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
} else {
break;
}
}
if (iter == 10) {
/* If the coeffs still do not fit into the 16 bit range after 10 iterations,
fall back to the A(z)=1 filter. */
OPUS_CLEAR(lpc, p);
_lpc[0] = 4096; /* Q12 */
} else {
for (i = 0; i < p; i++) {
_lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */
}
}
}
#endif
}
void celt_fir_c(
const opus_val16 *_x,
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *_y,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
SAVE_STACK;
celt_assert(x != y);
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
for (i=0;i<N-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(rnum, x+i, sum, ord, arch);
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
_y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
opus_val32 sum[4];
sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
y[i ] = ROUND16(sum[0], SIG_SHIFT);
y[i+1] = ROUND16(sum[1], SIG_SHIFT);
y[i+2] = ROUND16(sum[2], SIG_SHIFT);
y[i+3] = ROUND16(sum[3], SIG_SHIFT);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
sum = MAC16_16(sum,rnum[j],x[i+j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
y[i] = ROUND16(sum, SIG_SHIFT);
}
#endif
RESTORE_STACK;
}
@@ -166,7 +196,7 @@ void celt_iir(const opus_val32 *_x,
{
mem[j]=mem[j-1];
}
mem[0] = ROUND16(sum,SIG_SHIFT);
mem[0] = SROUND16(sum, SIG_SHIFT);
_y[i] = sum;
}
#else
@@ -195,20 +225,20 @@ void celt_iir(const opus_val32 *_x,
xcorr_kernel(rden, y+i, sum, ord, arch);
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
_y[i+1] = sum[1];
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
_y[i+2] = sum[2];
sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
_y[i+3] = sum[3];
}
for (;i<N;i++)
@@ -216,7 +246,7 @@ void celt_iir(const opus_val32 *_x,
opus_val32 sum = _x[i];
for (j=0;j<ord;j++)
sum -= MULT16_16(rden[j],y[i+j]);
y[i+ord] = ROUND16(sum,SIG_SHIFT);
y[i+ord] = SROUND16(sum,SIG_SHIFT);
_y[i] = sum;
}
for(i=0;i<ord;i++)
+2 -3
View File
@@ -45,12 +45,11 @@ void celt_fir_c(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if !defined(OVERRIDE_CELT_FIR)
#define celt_fir(x, num, y, N, ord, mem, arch) \
(celt_fir_c(x, num, y, N, ord, mem, arch))
#define celt_fir(x, num, y, N, ord, arch) \
(celt_fir_c(x, num, y, N, ord, arch))
#endif
void celt_iir(const opus_val32 *x,
+1 -1
View File
@@ -482,7 +482,7 @@ static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
k0=_k;
q=row[_n];
if(q>_i){
celt_assert(p>q);
celt_sig_assert(p>q);
_k=_n;
do p=CELT_PVQ_U_ROW[--_k][_n];
while(p>_i);
+4
View File
@@ -49,7 +49,11 @@
This macro should only be used for implementing ec_ilog(), if it is defined.
All other code should use EC_ILOG() instead.*/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#if defined(_MSC_VER) && (_MSC_VER >= 1910)
# include <intrin0.h> /* Improve compiler throughput. */
#else
# include <intrin.h>
#endif
/*In _DEBUG mode this is not an intrinsic by default.*/
# pragma intrinsic(_BitScanReverse)
+2 -2
View File
@@ -122,7 +122,7 @@ opus_uint32 ec_tell_frac(ec_ctx *_this);
/* Tested exhaustively for all n and for 1<=d<=256 */
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
celt_assert(d>0);
celt_sig_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (d>256)
return n/d;
@@ -138,7 +138,7 @@ static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
}
static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
celt_assert(d>0);
celt_sig_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (n<0)
return -(opus_int32)celt_udiv(-n, d);
+1 -1
View File
@@ -85,7 +85,7 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
The bits must have been encoded with ec_enc_uint().
No call to ec_dec_update() is necessary after this call.
_ft: The number of integers that can be decoded (one more than the max).
This must be at least one, and no more than 2**32-1.
This must be at least 2, and no more than 2**32-1.
Return: The decoded bits.*/
opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
+1 -1
View File
@@ -67,7 +67,7 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
/*Encodes a raw unsigned integer in the stream.
_fl: The integer to encode.
_ft: The number of integers that can be encoded (one more than the max).
This must be at least one, and no more than 2**32-1.*/
This must be at least 2, and no more than 2**32-1.*/
void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
/*Encodes a sequence of raw bits in the stream.
+55 -3
View File
@@ -59,6 +59,14 @@ extern opus_int64 celt_mips;
#define SHR(a,b) SHR32(a,b)
#define PSHR(a,b) PSHR32(a,b)
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
/** Negate 32-bit value, ignore any overflows */
#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
static OPUS_INLINE short NEG16(int x)
{
int res;
@@ -227,12 +235,11 @@ static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
//#define SHR(a,shift) ((a) >> (shift))
//#define SHL(a,shift) ((a) << (shift))
#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
{
@@ -403,6 +410,51 @@ static OPUS_INLINE short MULT16_16_16(int a, int b)
return res;
}
/* result fits in 32 bits */
static OPUS_INLINE int MULT32_32_32(opus_int64 a, opus_int64 b)
{
opus_int64 res;
if (!VERIFY_INT(a) || !VERIFY_INT(b))
{
fprintf (stderr, "MULT32_32_32: inputs are not int: %d %d\n", a, b);
#ifdef FIXED_DEBUG_ASSERT
celt_assert(0);
#endif
}
res = a*b;
if (!VERIFY_INT(res))
{
fprintf (stderr, "MULT32_32_32: output is not int: %d\n", res);
#ifdef FIXED_DEBUG_ASSERT
celt_assert(0);
#endif
}
celt_mips+=5;
return res;
}
static OPUS_INLINE int MULT32_32_Q16(opus_int64 a, opus_int64 b)
{
opus_int64 res;
if (!VERIFY_INT(a) || !VERIFY_INT(b))
{
fprintf (stderr, "MULT32_32_Q16: inputs are not int: %d %d\n", a, b);
#ifdef FIXED_DEBUG_ASSERT
celt_assert(0);
#endif
}
res = ((opus_int64)(a)*(opus_int64)(b)) >> 16;
if (!VERIFY_INT(res))
{
fprintf (stderr, "MULT32_32_Q16: output is not int: %d*%d=%d\n", a, b, (int)res);
#ifdef FIXED_DEBUG_ASSERT
celt_assert(0);
#endif
}
celt_mips+=5;
return res;
}
#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
{
+22 -1
View File
@@ -57,6 +57,13 @@
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
#endif
/** 32x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),16))
#else
#define MULT32_32_Q16(a,b) (ADD32(ADD32(ADD32((opus_val32)(SHR32(((opus_uint32)((a)&0x0000ffff)*(opus_uint32)((b)&0x0000ffff)),16)), MULT16_16SU(SHR32(a,16),((b)&0x0000ffff))), MULT16_16SU(SHR32(b,16),((a)&0x0000ffff))), SHL32(MULT16_16(SHR32(a,16),SHR32(b,16)),16)))
#endif
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
@@ -102,8 +109,11 @@
#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
/** Shift by a and round-to-nearest 32-bit value. Result is a 16-bit value */
#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
/** Shift by a and round-to-nearest 32-bit value. Result is a saturated 16-bit value */
#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
/** Divide by two */
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
@@ -117,9 +127,20 @@
/** Subtract two 32-bit values */
#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
/** Negate 32-bit value, ignore any overflows */
#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
/** 16x16 multiplication where the result fits in 16 bits */
#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
/** 32x32 multiplication where the result fits in 32 bits */
#define MULT32_32_32(a,b) ((((opus_val32)(a))*((opus_val32)(b))))
/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */
/** 16x16 multiplication where the result fits in 32 bits */
#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b)))
+39 -27
View File
@@ -61,7 +61,45 @@
** the config.h file.
*/
#if (HAVE_LRINTF)
/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
#if defined(__GNUC__) && defined(__SSE__)
#include <xmmintrin.h>
static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1))
#include <xmmintrin.h>
static OPUS_INLINE opus_int32 float2int(float value)
{
/* _mm_load_ss will generate same code as _mm_set_ss
** in _MSC_VER >= 1914 /02 so keep __mm_load__ss
** for backward compatibility.
*/
return _mm_cvtss_si32(_mm_load_ss(&value));
}
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
#include <math.h>
/* Win32 doesn't seem to have these functions.
** Therefore implement OPUS_INLINE versions of these functions here.
*/
static OPUS_INLINE opus_int32
float2int (float flt)
{ int intgr;
_asm
{ fld flt
fistp intgr
} ;
return intgr ;
}
#elif defined(HAVE_LRINTF)
/* These defines enable functionality introduced with the 1999 ISO C
** standard. They must be defined before the inclusion of math.h to
@@ -90,32 +128,6 @@
#include <math.h>
#define float2int(x) lrint(x)
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64)
#include <xmmintrin.h>
__inline long int float2int(float value)
{
return _mm_cvtss_si32(_mm_load_ss(&value));
}
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
#include <math.h>
/* Win32 doesn't seem to have these functions.
** Therefore implement OPUS_INLINE versions of these functions here.
*/
__inline long int
float2int (float flt)
{ int intgr;
_asm
{ fld flt
fistp intgr
} ;
return intgr ;
}
#else
#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L)
+28 -28
View File
@@ -82,8 +82,8 @@ static void kf_bfly2(
C_SUB( Fout2[0] , Fout[0] , t );
C_ADDTO( Fout[0] , t );
t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
C_SUB( Fout2[1] , Fout[1] , t );
C_ADDTO( Fout[1] , t );
@@ -92,8 +92,8 @@ static void kf_bfly2(
C_SUB( Fout2[2] , Fout[2] , t );
C_ADDTO( Fout[2] , t );
t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
C_SUB( Fout2[3] , Fout[3] , t );
C_ADDTO( Fout[3] , t );
Fout += 8;
@@ -126,10 +126,10 @@ static void kf_bfly4(
C_ADDTO( *Fout , scratch1 );
C_SUB( scratch1 , Fout[1] , Fout[3] );
Fout[1].r = scratch0.r + scratch1.i;
Fout[1].i = scratch0.i - scratch1.r;
Fout[3].r = scratch0.r - scratch1.i;
Fout[3].i = scratch0.i + scratch1.r;
Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
Fout+=4;
}
} else {
@@ -160,10 +160,10 @@ static void kf_bfly4(
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = scratch[5].r + scratch[4].i;
Fout[m].i = scratch[5].i - scratch[4].r;
Fout[m3].r = scratch[5].r - scratch[4].i;
Fout[m3].i = scratch[5].i + scratch[4].r;
Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
++Fout;
}
}
@@ -212,18 +212,18 @@ static void kf_bfly3(
tw1 += fstride;
tw2 += fstride*2;
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
C_MULBYSCALAR( scratch[0] , epi3.i );
C_ADDTO(*Fout,scratch[3]);
Fout[m2].r = Fout[m].r + scratch[0].i;
Fout[m2].i = Fout[m].i - scratch[0].r;
Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
Fout[m].r -= scratch[0].i;
Fout[m].i += scratch[0].r;
Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
++Fout;
} while(--k);
@@ -282,22 +282,22 @@ static void kf_bfly5(
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);
+3 -2
View File
@@ -38,7 +38,8 @@
#include "mathops.h"
/*Compute floor(sqrt(_val)) with exact arithmetic.
This has been tested on all possible 32-bit inputs.*/
_val must be greater than 0.
This has been tested on all possible 32-bit inputs greater than 0.*/
unsigned isqrt32(opus_uint32 _val){
unsigned b;
unsigned g;
@@ -182,7 +183,7 @@ opus_val32 celt_rcp(opus_val32 x)
int i;
opus_val16 n;
opus_val16 r;
celt_assert2(x>0, "celt_rcp() only defined for positive values");
celt_sig_assert(x>0);
i = celt_ilog2(x);
/* n is Q15 with range [0,1). */
n = VSHR32(x,i-15)-32768;
+36 -4
View File
@@ -38,11 +38,44 @@
#include "entcode.h"
#include "os_support.h"
#define PI 3.141592653f
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
unsigned isqrt32(opus_uint32 _val);
/* CELT doesn't need it for fixed-point, by analysis.c does. */
#if !defined(FIXED_POINT) || defined(ANALYSIS_C)
#define cA 0.43157974f
#define cB 0.67848403f
#define cC 0.08595542f
#define cE ((float)PI/2)
static OPUS_INLINE float fast_atan2f(float y, float x) {
float x2, y2;
x2 = x*x;
y2 = y*y;
/* For very small values, we don't care about the answer, so
we can just return 0. */
if (x2 + y2 < 1e-18f)
{
return 0;
}
if(x2<y2){
float den = (y2 + cB*x2) * (y2 + cC*x2);
return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
}else{
float den = (x2 + cB*y2) * (x2 + cC*y2);
return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
}
}
#undef cA
#undef cB
#undef cC
#undef cE
#endif
#ifndef OVERRIDE_CELT_MAXABS16
static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
{
@@ -80,7 +113,6 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
#ifndef FIXED_POINT
#define PI 3.141592653f
#define celt_sqrt(x) ((float)sqrt(x))
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
#define celt_rsqrt_norm(x) (celt_rsqrt(x))
@@ -105,7 +137,7 @@ static OPUS_INLINE float celt_log2(float x)
} in;
in.f = x;
integer = (in.i>>23)-127;
in.i -= integer<<23;
in.i -= (opus_uint32)integer<<23;
frac = in.f - 1.5f;
frac = -0.41445418f + frac*(0.95909232f
+ frac*(-0.33951290f + frac*0.16541097f));
@@ -128,7 +160,7 @@ static OPUS_INLINE float celt_exp2(float x)
/* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
res.f = 0.99992522f + frac * (0.69583354f
+ frac * (0.22606716f + 0.078024523f*frac));
res.i = (res.i + (integer<<23)) & 0x7fffffff;
res.i = (res.i + ((opus_uint32)integer<<23)) & 0x7fffffff;
return res.f;
}
@@ -147,7 +179,7 @@ static OPUS_INLINE float celt_exp2(float x)
/** Integer log in base2. Undefined for zero and negative numbers */
static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
{
celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
celt_sig_assert(x>0);
return EC_ILOG(x)-1;
}
#endif
+8 -8
View File
@@ -270,8 +270,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
int rev;
kiss_fft_scalar yr, yi;
rev = *bitrev++;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
@@ -301,8 +301,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
@@ -312,8 +312,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
@@ -333,8 +333,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
wp1++;
wp2--;
}
+1
View File
@@ -53,6 +53,7 @@
#include "celt_lpc.h"
#include "vq.h"
#define OVERRIDE_COMB_FILTER_CONST
#define OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+1 -10
View File
@@ -36,11 +36,6 @@
#include "mathops.h"
#include "arch.h"
static unsigned extract_collapse_mask(int *iy, int N, int B);
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch);
#define OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
@@ -69,11 +64,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
}
#define OVERRIDE_renormalise_vector
#define renormalise_vector(X, N, gain, arch) \
(renormalise_vector_mips(X, N, gain, arch))
void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch)
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
{
int i;
#ifdef FIXED_POINT
+1 -1
View File
@@ -427,7 +427,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
}
#endif /* CUSTOM_MODES_ONLY */
opus_free((opus_int16*)mode->eBands);
opus_free((opus_int16*)mode->allocVectors);
opus_free((unsigned char*)mode->allocVectors);
opus_free((opus_val16*)mode->window);
opus_free((opus_int16*)mode->logN);
-1
View File
@@ -39,7 +39,6 @@
#include "opus_defines.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+13 -33
View File
@@ -102,11 +102,9 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
}
}
static void celt_fir5(const opus_val16 *x,
static void celt_fir5(opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
opus_val16 *mem)
int N)
{
int i;
opus_val16 num0, num1, num2, num3, num4;
@@ -116,11 +114,11 @@ static void celt_fir5(const opus_val16 *x,
num2=num[2];
num3=num[3];
num4=num[4];
mem0=mem[0];
mem1=mem[1];
mem2=mem[2];
mem3=mem[3];
mem4=mem[4];
mem0=0;
mem1=0;
mem2=0;
mem3=0;
mem4=0;
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
@@ -134,13 +132,8 @@ static void celt_fir5(const opus_val16 *x,
mem2 = mem1;
mem1 = mem0;
mem0 = x[i];
y[i] = ROUND16(sum, SIG_SHIFT);
x[i] = ROUND16(sum, SIG_SHIFT);
}
mem[0]=mem0;
mem[1]=mem1;
mem[2]=mem2;
mem[3]=mem3;
mem[4]=mem4;
}
@@ -150,7 +143,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
int i;
opus_val32 ac[5];
opus_val16 tmp=Q15ONE;
opus_val16 lpc[4], mem[5]={0,0,0,0,0};
opus_val16 lpc[4];
opus_val16 lpc2[5];
opus_val16 c1 = QCONST16(.8f,15);
#ifdef FIXED_POINT
@@ -211,7 +204,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
celt_fir5(x_lp, lpc2, len>>1);
}
/* Pure C implementation. */
@@ -220,13 +213,8 @@ opus_val32
#else
void
#endif
#if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{
#if 0 /* This is a simple version of the pitch correlation that should work
@@ -261,15 +249,11 @@ celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 maxcorr=1;
#endif
celt_assert(max_pitch>0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@@ -285,11 +269,7 @@ celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
for (;i<max_pitch;i++)
{
opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@@ -378,7 +358,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
for (j=0;j<len>>1;j++)
sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
#else
sum = celt_inner_prod_c(x_lp, y+i, len>>1);
sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
#endif
xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
@@ -424,7 +404,7 @@ static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy
sx = celt_ilog2(xx)-14;
sy = celt_ilog2(yy)-14;
shift = sx + sy;
x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
if (shift & 1) {
if (x2y2 < 32768)
{
+3 -11
View File
@@ -46,8 +46,7 @@
#include "mips/pitch_mipsr1.h"
#endif
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
|| defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@@ -184,17 +183,10 @@ opus_val32
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
#ifndef OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr celt_pitch_xcorr_c
#endif
#endif
+9 -2
View File
@@ -418,6 +418,7 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
#endif
oldEBands[i+c*m->nbEBands] += offset;
error[i+c*m->nbEBands] -= offset;
bits_left--;
} while (++c < C);
}
@@ -456,7 +457,7 @@ void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *ol
/* It would be better to express this invariant as a
test on C at function entry, but that isn't enough
to make the static analyzer happy. */
celt_assert(c<2);
celt_sig_assert(c<2);
tell = ec_tell(dec);
if(budget-tell>=15)
{
@@ -547,9 +548,15 @@ void amp2Log2(const CELTMode *m, int effEnd, int end,
c=0;
do {
for (i=0;i<effEnd;i++)
{
bandLogE[i+c*m->nbEBands] =
celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
celt_log2(bandE[i+c*m->nbEBands])
- SHL16((opus_val16)eMeans[i],6);
#ifdef FIXED_POINT
/* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
#endif
}
for (i=effEnd;i<end;i++)
bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
} while (++c < C);
+10 -14
View File
@@ -348,12 +348,17 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/*This if() block is the only part of the allocation function that
is not a mandatory part of the bitstream: any bands we choose to
skip here must be explicitly signaled.*/
/*Choose a threshold with some hysteresis to keep bands from
fluctuating in and out.*/
int depth_threshold;
/*We choose a threshold with some hysteresis to keep bands from
fluctuating in and out, but we try not to fold below a certain point. */
if (codedBands > 17)
depth_threshold = j<prev ? 7 : 9;
else
depth_threshold = 0;
#ifdef FUZZING
if ((rand()&0x1) == 0)
#else
if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
#endif
{
ec_enc_bit_logp(ec, 1, 1);
@@ -524,13 +529,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
return codedBands;
}
#if !defined(__clang__) && defined(__GNUC__) && defined(__arm__) && \
__GNUC__ == 4 && __GNUC_MINOR__ == 8
#warning "OPUS library causes an internal compiler error for gcc-4.8 based toolchain in arm"
#pragma GCC push_options
#pragma GCC optimize ("O0")
#endif
int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
{
int lo, hi, len, j;
@@ -642,7 +641,4 @@ int compute_allocation(const CELTMode *m, int start, int end, const int *offsets
RESTORE_STACK;
return codedBands;
}
#if !defined(__clang__) && defined(__GNUC__) && defined(__arm__) && \
__GNUC__ == 4 && __GNUC_MINOR__ == 8
#pragma GCC pop_options
#endif
+1 -1
View File
@@ -95,7 +95,7 @@ static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int puls
@param pulses Number of pulses per band (returned)
@return Total number of bits allocated
*/
int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
#endif
+2 -2
View File
@@ -40,7 +40,7 @@
#endif
#ifdef USE_ALLOCA
# ifdef WIN32
# ifdef _WIN32
# include <malloc.h>
# else
# ifdef HAVE_ALLOCA_H
@@ -102,7 +102,7 @@
#define VARDECL(type, var) type *var
# ifdef WIN32
# ifdef _WIN32
# define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
# else
# define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
@@ -1,7 +1,7 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#include <NE10_types.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
@@ -1,7 +1,7 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#include <NE10_types.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
+76 -42
View File
@@ -39,6 +39,10 @@
#include "rate.h"
#include "pitch.h"
#if defined(MIPSr1_ASM)
#include "mips/vq_mipsr1.h"
#endif
#ifndef OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
@@ -67,7 +71,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
}
#endif /* OVERRIDE_vq_exp_rotation1 */
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
{
static const int SPREAD_FACTOR[3]={15,10,5};
int i;
@@ -158,42 +162,27 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
return collapse_mask;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
)
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
{
VARDECL(celt_norm, y);
VARDECL(int, iy);
VARDECL(opus_val16, signx);
VARDECL(int, signx);
int i, j;
opus_val16 s;
int pulsesLeft;
opus_val32 sum;
opus_val32 xy;
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
(void)arch;
ALLOC(y, N, celt_norm);
ALLOC(iy, N, int);
ALLOC(signx, N, opus_val16);
exp_rotation(X, N, 1, B, K, spread);
ALLOC(signx, N, int);
/* Get rid of the sign */
sum = 0;
j=0; do {
if (X[j]>0)
signx[j]=1;
else {
signx[j]=-1;
X[j]=-X[j];
}
signx[j] = X[j]<0;
/* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
X[j] = ABS16(X[j]);
iy[j] = 0;
y[j] = 0;
} while (++j<N);
@@ -225,7 +214,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
while (++j<N);
sum = QCONST16(1.f,14);
}
rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
#ifdef FIXED_POINT
rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
#else
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
#endif
j=0; do {
#ifdef FIXED_POINT
/* It's really important to round *towards zero* here */
@@ -240,12 +234,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft -= iy[j];
} while (++j<N);
}
celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
celt_sig_assert(pulsesLeft>=0);
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
#ifdef FIXED_POINT_DEBUG
celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
celt_sig_assert(pulsesLeft<=N+3);
#endif
if (pulsesLeft > N+3)
{
@@ -256,12 +250,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft=0;
}
s = 1;
for (i=0;i<pulsesLeft;i++)
{
opus_val16 Rxy, Ryy;
int best_id;
opus_val32 best_num = -VERY_LARGE16;
opus_val16 best_den = 0;
opus_val32 best_num;
opus_val16 best_den;
#ifdef FIXED_POINT
int rshift;
#endif
@@ -272,9 +266,22 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD16(yy, 1);
j=0;
/* Calculations for position 0 are out of the loop, in part to reduce
mispredicted branches (since the if condition is usually false)
in the loop. */
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy = ADD16(yy, y[0]);
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
Rxy is positive because the sign is pre-computed) */
Rxy = MULT16_16_Q15(Rxy,Rxy);
best_den = Ryy;
best_num = Rxy;
j=1;
do {
opus_val16 Rxy, Ryy;
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
@@ -285,8 +292,11 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
Rxy = MULT16_16_Q15(Rxy,Rxy);
/* The idea is to check for num/den >= best_num/best_den, but that way
we can do it without any division */
/* OPT: Make sure to use conditional moves here */
if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
/* OPT: It's not clear whether a cmov is faster than a branch here
since the condition is more often false than true and using
a cmov introduces data dependencies across iterations. The optimal
choice may be architecture-dependent. */
if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
{
best_den = Ryy;
best_num = Rxy;
@@ -301,23 +311,47 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2*s;
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
j=0;
do {
X[j] = MULT16_16(signx[j],X[j]);
if (signx[j] < 0)
iy[j] = -iy[j];
/*iy[j] = signx[j] ? -iy[j] : iy[j];*/
/* OPT: The is more likely to be compiled without a branch than the code above
but has the same performance otherwise. */
iy[j] = (iy[j]^-signx[j]) + signx[j];
} while (++j<N);
RESTORE_STACK;
return yy;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch)
{
VARDECL(int, iy);
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
/* Covers vectorization by up to 4. */
ALLOC(iy, N+3, int);
exp_rotation(X, N, 1, B, K, spread);
yy = op_pvq_search(X, iy, K, N, arch);
encode_pulses(iy, N, K, enc);
#ifdef RESYNTH
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
#endif
if (resynth)
{
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
}
collapse_mask = extract_collapse_mask(iy, N, B);
RESTORE_STACK;
@@ -401,7 +435,7 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int
/* 0.63662 = 2/pi */
itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
#else
itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
#endif
return itheta;
+12 -8
View File
@@ -37,10 +37,18 @@
#include "entdec.h"
#include "modes.h"
#if defined(MIPSr1_ASM)
#include "mips/vq_mipsr1.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT))
#include "x86/vq_sse.h"
#endif
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
#if !defined(OVERRIDE_OP_PVQ_SEARCH)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_c(x, iy, K, N, arch))
#endif
/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
* the pitch and a combination of pulses such that its norm is still equal
@@ -51,12 +59,8 @@
* @param enc Entropy encoder state
* @ret A mask indicating which blocks in the band received pulses
*/
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
);
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch);
/** Algebraic pulse decoder
* @param X Decoded normalised spectrum (returned)
+4 -6
View File
@@ -41,12 +41,11 @@ void celt_fir_sse4_1(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if defined(OPUS_X86_PRESUME_SSE4_1)
#define celt_fir(x, num, y, N, ord, mem, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
#define celt_fir(x, num, y, N, ord, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
#else
@@ -56,11 +55,10 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
# define celt_fir(x, num, y, N, ord, mem, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
# define celt_fir(x, num, y, N, ord, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
#endif
#endif
@@ -40,65 +40,23 @@
#if defined(FIXED_POINT)
void celt_fir_sse4_1(const opus_val16 *_x,
void celt_fir_sse4_1(const opus_val16 *x,
const opus_val16 *num,
opus_val16 *_y,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
__m128i vecNoA;
opus_int32 noA ;
SAVE_STACK;
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N-7;i+=8)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
x[i+ord+4]=_x[i+4];
x[i+ord+5]=_x[i+5];
x[i+ord+6]=_x[i+6];
x[i+ord+7]=_x[i+7];
}
for (;i<N-3;i+=4)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
}
for (;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
noA = EXTEND32(1) << SIG_SHIFT >> 1;
vecNoA = _mm_set_epi32(noA, noA, noA, noA);
@@ -107,25 +65,24 @@ void celt_fir_sse4_1(const opus_val16 *_x,
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
xcorr_kernel(rnum, x+i, sums, ord, arch);
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
vecX = OP_CVTEPI16_EPI32_M64(_x + i);
vecX = OP_CVTEPI16_EPI32_M64(x + i);
vecSum = _mm_add_epi32(vecSum, vecX);
vecSum = _mm_packs_epi32(vecSum, vecSum);
_mm_storel_epi64((__m128i *)(_y + i), vecSum);
_mm_storel_epi64((__m128i *)(y + i), vecSum);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
for (j=0;j<ord;j++)
sum = MAC16_16(sum, rnum[j], x[i + j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
}
#endif
RESTORE_STACK;
}
+1 -1
View File
@@ -91,7 +91,7 @@ opus_val32 celt_inner_prod_sse2(
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse(
const opus_val16 *x,
const opus_val16 *y,
+48 -3
View File
@@ -117,6 +117,14 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32
__m128i sum0, sum1, sum2, sum3, vecSum;
__m128i initSum;
#ifdef OPUS_CHECK_ASM
opus_val32 sum_c[4];
for (j=0;j<4;j++) {
sum_c[j] = sum[j];
}
xcorr_kernel_c(x, y, sum_c, len);
#endif
celt_assert(len >= 3);
sum0 = _mm_setzero_si128();
@@ -177,19 +185,56 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32
vecSum = _mm_add_epi32(vecSum, sum2);
}
for (;j<len;j++)
vecX = OP_CVTEPI16_EPI32_M64(&x[len - 4]);
if (len - j == 3)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecX0 = _mm_shuffle_epi32(vecX, 0x55);
vecX1 = _mm_shuffle_epi32(vecX, 0xaa);
vecX2 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);
sum2 = _mm_mullo_epi32(vecX2, vecY2);
vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum1);
vecSum = _mm_add_epi32(vecSum, sum2);
}
else if (len - j == 2)
{
vecX0 = _mm_shuffle_epi32(vecX, 0xaa);
vecX1 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);
vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum1);
}
else if (len - j == 1)
{
vecX0 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
vecSum = _mm_add_epi32(vecSum, sum0);
}
initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
initSum = _mm_add_epi32(initSum, vecSum);
_mm_storeu_si128((__m128i *)sum, initSum);
#ifdef OPUS_CHECK_ASM
celt_assert(!memcmp(sum_c, sum, sizeof(sum_c)));
#endif
}
#endif
+50
View File
@@ -0,0 +1,50 @@
/* Copyright (c) 2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef VQ_SSE_H
#define VQ_SSE_H
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#define OVERRIDE_OP_PVQ_SEARCH
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
#if defined(OPUS_X86_PRESUME_SSE2)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_sse2(x, iy, K, N, arch))
#else
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch);
# define op_pvq_search(X, iy, K, N, arch) \
((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
#endif
#endif
#endif
+217
View File
@@ -0,0 +1,217 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2009 Xiph.Org Foundation
Copyright (c) 2007-2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "vq.h"
#include "x86cpu.h"
#ifndef FIXED_POINT
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
{
int i, j;
int pulsesLeft;
float xy, yy;
VARDECL(celt_norm, y);
VARDECL(celt_norm, X);
VARDECL(float, signy);
__m128 signmask;
__m128 sums;
__m128i fours;
SAVE_STACK;
(void)arch;
/* All bits set to zero, except for the sign bit. */
signmask = _mm_set_ps1(-0.f);
fours = _mm_set_epi32(4, 4, 4, 4);
ALLOC(y, N+3, celt_norm);
ALLOC(X, N+3, celt_norm);
ALLOC(signy, N+3, float);
OPUS_COPY(X, _X, N);
X[N] = X[N+1] = X[N+2] = 0;
sums = _mm_setzero_ps();
for (j=0;j<N;j+=4)
{
__m128 x4, s4;
x4 = _mm_loadu_ps(&X[j]);
s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
/* Get rid of the sign */
x4 = _mm_andnot_ps(signmask, x4);
sums = _mm_add_ps(sums, x4);
/* Clear y and iy in case we don't do the projection. */
_mm_storeu_ps(&y[j], _mm_setzero_ps());
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
_mm_storeu_ps(&X[j], x4);
_mm_storeu_ps(&signy[j], s4);
}
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
xy = yy = 0;
pulsesLeft = K;
/* Do a pre-search by projecting on the pyramid */
if (K > (N>>1))
{
__m128i pulses_sum;
__m128 yy4, xy4;
__m128 rcp4;
opus_val32 sum = _mm_cvtss_f32(sums);
/* If X is too small, just replace it with a pulse at 0 */
/* Prevents infinities and NaNs from causing too many pulses
to be allocated. 64 is an approximation of infinity here. */
if (!(sum > EPSILON && sum < 64))
{
X[0] = QCONST16(1.f,14);
j=1; do
X[j]=0;
while (++j<N);
sums = _mm_set_ps1(1.f);
}
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
xy4 = yy4 = _mm_setzero_ps();
pulses_sum = _mm_setzero_si128();
for (j=0;j<N;j+=4)
{
__m128 rx4, x4, y4;
__m128i iy4;
x4 = _mm_loadu_ps(&X[j]);
rx4 = _mm_mul_ps(x4, rcp4);
iy4 = _mm_cvttps_epi32(rx4);
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
_mm_storeu_si128((__m128i*)&iy[j], iy4);
y4 = _mm_cvtepi32_ps(iy4);
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
/* double the y[] vector so we don't have to do it in the search loop. */
_mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
}
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
xy = _mm_cvtss_f32(xy4);
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
yy = _mm_cvtss_f32(yy4);
}
X[N] = X[N+1] = X[N+2] = -100;
y[N] = y[N+1] = y[N+2] = 100;
celt_sig_assert(pulsesLeft>=0);
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
if (pulsesLeft > N+3)
{
opus_val16 tmp = (opus_val16)pulsesLeft;
yy = MAC16_16(yy, tmp, tmp);
yy = MAC16_16(yy, tmp, y[0]);
iy[0] += pulsesLeft;
pulsesLeft=0;
}
for (i=0;i<pulsesLeft;i++)
{
int best_id;
__m128 xy4, yy4;
__m128 max, max2;
__m128i count;
__m128i pos;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD16(yy, 1);
xy4 = _mm_load1_ps(&xy);
yy4 = _mm_load1_ps(&yy);
max = _mm_setzero_ps();
pos = _mm_setzero_si128();
count = _mm_set_epi32(3, 2, 1, 0);
for (j=0;j<N;j+=4)
{
__m128 x4, y4, r4;
x4 = _mm_loadu_ps(&X[j]);
y4 = _mm_loadu_ps(&y[j]);
x4 = _mm_add_ps(x4, xy4);
y4 = _mm_add_ps(y4, yy4);
y4 = _mm_rsqrt_ps(y4);
r4 = _mm_mul_ps(x4, y4);
/* Update the index of the max. */
pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
/* Update the max. */
max = _mm_max_ps(max, r4);
/* Update the indices (+4) */
count = _mm_add_epi32(count, fours);
}
/* Horizontal max */
max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
/* Now that max2 contains the max at all positions, look at which value(s) of the
partial max is equal to the global max. */
pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
best_id = _mm_cvtsi128_si32(pos);
/* Updating the sums of the new pulse(s) */
xy = ADD32(xy, EXTEND32(X[best_id]));
/* We're multiplying y[j] by two so we don't have to do it here */
yy = ADD16(yy, y[best_id]);
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
for (j=0;j<N;j+=4)
{
__m128i y4;
__m128i s4;
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
_mm_storeu_si128((__m128i*)&iy[j], y4);
}
RESTORE_STACK;
return yy;
}
#endif
+13 -1
View File
@@ -33,6 +33,7 @@
#include "celt_lpc.h"
#include "pitch.h"
#include "pitch_sse.h"
#include "vq.h"
#if defined(OPUS_HAVE_RTCD)
@@ -46,7 +47,6 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch
) = {
celt_fir_c, /* non-sse */
@@ -151,5 +151,17 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch
) = {
op_pvq_search_c, /* non-sse */
op_pvq_search_c,
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search)
};
#endif
#endif
#endif
+2 -30
View File
@@ -56,38 +56,10 @@
int opus_select_arch(void);
# endif
/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
reference, these require 16-byte alignment and load a full 16 bytes (instead
of 4 or 8), possibly reading out of bounds.
We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
_mm_loadl_epi64(), which should have the same semantics as an m32 or m64
reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
optimize this out when optimizations ARE enabled.
Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
(which is fair, since technically the compiler is always allowed to do the
dereference before invoking the function implementing the intrinsic).
However, it is smart enough to eliminate the extra MOVD instruction.
For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
the extra MOVQ if it's specified explicitly */
# if defined(__clang__) || !defined(__OPTIMIZE__)
# define OP_CVTEPI8_EPI32_M32(x) \
#define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
# else
# define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(*(__m128i *)(x)))
#endif
# if !defined(__OPTIMIZE__)
# define OP_CVTEPI16_EPI32_M64(x) \
#define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
# else
# define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(*(__m128i *)(x)))
# endif
#endif
-39
View File
@@ -1,39 +0,0 @@
From 609166a46f6a22ae2d0a0ab7c64415c779c65f37 Mon Sep 17 00:00:00 2001
From: Juan Gomez <atilag@gmail.com>
Date: Wed, 26 Nov 2014 23:57:49 +0100
Subject: [PATCH] Bug 1056337 - Upgrade toolchain used for B2G ICS builds *
Patch for gcc ICE in OPUS library (arm)
---
media/libopus/celt/rate.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/media/libopus/celt/rate.c b/media/libopus/celt/rate.c
index e13d839..1055e63 100644
--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@@ -523,6 +523,12 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
return codedBands;
}
+#if !defined(__clang__) && defined(__GNUC__) && defined(__arm__) && \
+ __GNUC__ == 4 && __GNUC_MINOR__ == 8
+#warning "OPUS library causes an internal compiler error for gcc-4.8 based toolchain in arm"
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+#endif
int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
{
@@ -635,4 +641,7 @@ int compute_allocation(const CELTMode *m, int start, int end, const int *offsets
RESTORE_STACK;
return codedBands;
}
-
+#if !defined(__clang__) && defined(__GNUC__) && defined(__arm__) && \
+ __GNUC__ == 4 && __GNUC_MINOR__ == 8
+#pragma GCC pop_options
+#endif
--
2.1.0
+1 -1
View File
@@ -64,7 +64,7 @@ def generate_sources_mozbuild(path):
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: %s /path/to/opus" % (sys.argv[0])
print("Usage: %s /path/to/opus" % (sys.argv[0]))
sys.exit(1)
generate_sources_mozbuild(sys.argv[1])
+1 -1
View File
@@ -531,7 +531,7 @@ OPUS_EXPORT int opus_packet_parse(
const unsigned char *frames[48],
opus_int16 size[48],
int *payload_offset
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5);
/** Gets the bandwidth of an Opus packet.
* @param [in] data <tt>char*</tt>: Opus packet
+2 -2
View File
@@ -178,7 +178,7 @@ OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encode
) OPUS_ARG_NONNULL(1);
/** Destroys a an encoder state.
/** Destroys an encoder state.
* @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed.
*/
OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st);
@@ -286,7 +286,7 @@ OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decode
int *error
) OPUS_ARG_NONNULL(1);
/** Destroys a an decoder state.
/** Destroys a decoder state.
* @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed.
*/
OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st);
+50 -4
View File
@@ -64,7 +64,7 @@ extern "C" {
/**Export control for opus functions */
#ifndef OPUS_EXPORT
# if defined(WIN32)
# if defined(_WIN32)
# if defined(OPUS_BUILD) && defined(DLL_EXPORT)
# define OPUS_EXPORT __declspec(dllexport)
# else
@@ -165,8 +165,13 @@ extern "C" {
#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
#define OPUS_GET_IN_DTX_REQUEST 4049
/** Defines for the presence of extended APIs. */
#define OPUS_HAVE_OPUS_PROJECTION_H
/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
@@ -208,6 +213,9 @@ extern "C" {
#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
#define OPUS_FRAMESIZE_80_MS 5007 /**< Use 80 ms frames */
#define OPUS_FRAMESIZE_100_MS 5008 /**< Use 100 ms frames */
#define OPUS_FRAMESIZE_120_MS 5009 /**< Use 120 ms frames */
/**@}*/
@@ -566,7 +574,9 @@ extern "C" {
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
@@ -581,7 +591,9 @@ extern "C" {
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
@@ -681,6 +693,40 @@ extern "C" {
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/** If set to 1, disables the use of phase inversion for intensity stereo,
* improving the quality of mono downmixes, but slightly reducing normal
* stereo quality. Disabling phase inversion in the decoder does not comply
* with RFC 6716, although it does not cause any interoperability issue and
* is expected to become part of the Opus standard once RFC 6716 is updated
* by draft-ietf-codec-opus-update.
* @see OPUS_GET_PHASE_INVERSION_DISABLED
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>0</dt><dd>Enable phase inversion (default).</dd>
* <dt>1</dt><dd>Disable phase inversion.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_PHASE_INVERSION_DISABLED(x) OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured phase inversion status.
* @see OPUS_SET_PHASE_INVERSION_DISABLED
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>Stereo phase inversion enabled (default).</dd>
* <dt>1</dt><dd>Stereo phase inversion disabled.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_PHASE_INVERSION_DISABLED(x) OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int_ptr(x)
/** Gets the DTX state of the encoder.
* Returns whether the last encoded frame was either a comfort noise update
* during DTX or not encoded because of DTX.
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>The encoder is not in DTX.</dd>
* <dt>1</dt><dd>The encoder is in DTX.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_IN_DTX(x) OPUS_GET_IN_DTX_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs
+2 -2
View File
@@ -273,7 +273,7 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_enc
unsigned char *mapping,
int application,
int *error
) OPUS_ARG_NONNULL(5);
) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6);
/** Initialize a previously allocated multistream encoder state.
* The memory pointed to by \a st must be at least the size returned by
@@ -342,7 +342,7 @@ OPUS_EXPORT int opus_multistream_surround_encoder_init(
int *coupled_streams,
unsigned char *mapping,
int application
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6) OPUS_ARG_NONNULL(7);
/** Encodes a multistream Opus frame.
* @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+568
View File
@@ -0,0 +1,568 @@
/* Copyright (c) 2017 Google Inc.
Written by Andrew Allen */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file opus_projection.h
* @brief Opus projection reference API
*/
#ifndef OPUS_PROJECTION_H
#define OPUS_PROJECTION_H
#include "opus_multistream.h"
#ifdef __cplusplus
extern "C" {
#endif
/** @cond OPUS_INTERNAL_DOC */
/** These are the actual encoder and decoder CTL ID numbers.
* They should not be used directly by applications.c
* In general, SETs should be even and GETs should be odd.*/
/**@{*/
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST 6001
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST 6003
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST 6005
/**@}*/
/** @endcond */
/** @defgroup opus_projection_ctls Projection specific encoder and decoder CTLs
*
* These are convenience macros that are specific to the
* opus_projection_encoder_ctl() and opus_projection_decoder_ctl()
* interface.
* The CTLs from @ref opus_genericctls, @ref opus_encoderctls,
* @ref opus_decoderctls, and @ref opus_multistream_ctls may be applied to a
* projection encoder or decoder as well.
*/
/**@{*/
/** Gets the gain (in dB. S7.8-format) of the demixing matrix from the encoder.
* @param[out] x <tt>opus_int32 *</tt>: Returns the gain (in dB. S7.8-format)
* of the demixing matrix.
* @hideinitializer
*/
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST, __opus_check_int_ptr(x)
/** Gets the size in bytes of the demixing matrix from the encoder.
* @param[out] x <tt>opus_int32 *</tt>: Returns the size in bytes of the
* demixing matrix.
* @hideinitializer
*/
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST, __opus_check_int_ptr(x)
/** Copies the demixing matrix to the supplied pointer location.
* @param[out] x <tt>unsigned char *</tt>: Returns the demixing matrix to the
* supplied pointer location.
* @param y <tt>opus_int32</tt>: The size in bytes of the reserved memory at the
* pointer location.
* @hideinitializer
*/
#define OPUS_PROJECTION_GET_DEMIXING_MATRIX(x,y) OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST, x, __opus_check_int(y)
/**@}*/
/** Opus projection encoder state.
* This contains the complete state of a projection Opus encoder.
* It is position independent and can be freely copied.
* @see opus_projection_ambisonics_encoder_create
*/
typedef struct OpusProjectionEncoder OpusProjectionEncoder;
/** Opus projection decoder state.
* This contains the complete state of a projection Opus decoder.
* It is position independent and can be freely copied.
* @see opus_projection_decoder_create
* @see opus_projection_decoder_init
*/
typedef struct OpusProjectionDecoder OpusProjectionDecoder;
/**\name Projection encoder functions */
/**@{*/
/** Gets the size of an OpusProjectionEncoder structure.
* @param channels <tt>int</tt>: The total number of input channels to encode.
* This must be no more than 255.
* @param mapping_family <tt>int</tt>: The mapping family to use for selecting
* the appropriate projection.
* @returns The size in bytes on success, or a negative error code
* (see @ref opus_errorcodes) on error.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_ambisonics_encoder_get_size(
int channels,
int mapping_family
);
/** Allocates and initializes a projection encoder state.
* Call opus_projection_encoder_destroy() to release
* this object when finished.
* @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
* This must be one of 8000, 12000, 16000,
* 24000, or 48000.
* @param channels <tt>int</tt>: Number of channels in the input signal.
* This must be at most 255.
* It may be greater than the number of
* coded channels (<code>streams +
* coupled_streams</code>).
* @param mapping_family <tt>int</tt>: The mapping family to use for selecting
* the appropriate projection.
* @param[out] streams <tt>int *</tt>: The total number of streams that will
* be encoded from the input.
* @param[out] coupled_streams <tt>int *</tt>: Number of coupled (2 channel)
* streams that will be encoded from the input.
* @param application <tt>int</tt>: The target encoder application.
* This must be one of the following:
* <dl>
* <dt>#OPUS_APPLICATION_VOIP</dt>
* <dd>Process signal for improved speech intelligibility.</dd>
* <dt>#OPUS_APPLICATION_AUDIO</dt>
* <dd>Favor faithfulness to the original input.</dd>
* <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
* <dd>Configure the minimum possible coding delay by disabling certain modes
* of operation.</dd>
* </dl>
* @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
* code (see @ref opus_errorcodes) on
* failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionEncoder *opus_projection_ambisonics_encoder_create(
opus_int32 Fs,
int channels,
int mapping_family,
int *streams,
int *coupled_streams,
int application,
int *error
) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5);
/** Initialize a previously allocated projection encoder state.
* The memory pointed to by \a st must be at least the size returned by
* opus_projection_ambisonics_encoder_get_size().
* This is intended for applications which use their own allocator instead of
* malloc.
* To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
* @see opus_projection_ambisonics_encoder_create
* @see opus_projection_ambisonics_encoder_get_size
* @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to initialize.
* @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
* This must be one of 8000, 12000, 16000,
* 24000, or 48000.
* @param channels <tt>int</tt>: Number of channels in the input signal.
* This must be at most 255.
* It may be greater than the number of
* coded channels (<code>streams +
* coupled_streams</code>).
* @param streams <tt>int</tt>: The total number of streams to encode from the
* input.
* This must be no more than the number of channels.
* @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
* to encode.
* This must be no larger than the total
* number of streams.
* Additionally, The total number of
* encoded channels (<code>streams +
* coupled_streams</code>) must be no
* more than the number of input channels.
* @param application <tt>int</tt>: The target encoder application.
* This must be one of the following:
* <dl>
* <dt>#OPUS_APPLICATION_VOIP</dt>
* <dd>Process signal for improved speech intelligibility.</dd>
* <dt>#OPUS_APPLICATION_AUDIO</dt>
* <dd>Favor faithfulness to the original input.</dd>
* <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
* <dd>Configure the minimum possible coding delay by disabling certain modes
* of operation.</dd>
* </dl>
* @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
* on failure.
*/
OPUS_EXPORT int opus_projection_ambisonics_encoder_init(
OpusProjectionEncoder *st,
opus_int32 Fs,
int channels,
int mapping_family,
int *streams,
int *coupled_streams,
int application
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6);
/** Encodes a projection Opus frame.
* @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
* @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved
* samples.
* This must contain
* <code>frame_size*channels</code>
* samples.
* @param frame_size <tt>int</tt>: Number of samples per channel in the input
* signal.
* This must be an Opus frame size for the
* encoder's sampling rate.
* For example, at 48 kHz the permitted values
* are 120, 240, 480, 960, 1920, and 2880.
* Passing in a duration of less than 10 ms
* (480 samples at 48 kHz) will prevent the
* encoder from using the LPC or hybrid modes.
* @param[out] data <tt>unsigned char*</tt>: Output payload.
* This must contain storage for at
* least \a max_data_bytes.
* @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
* memory for the output
* payload. This may be
* used to impose an upper limit on
* the instant bitrate, but should
* not be used as the only bitrate
* control. Use #OPUS_SET_BITRATE to
* control the bitrate.
* @returns The length of the encoded packet (in bytes) on success or a
* negative error code (see @ref opus_errorcodes) on failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode(
OpusProjectionEncoder *st,
const opus_int16 *pcm,
int frame_size,
unsigned char *data,
opus_int32 max_data_bytes
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
/** Encodes a projection Opus frame from floating point input.
* @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
* @param[in] pcm <tt>const float*</tt>: The input signal as interleaved
* samples with a normal range of
* +/-1.0.
* Samples with a range beyond +/-1.0
* are supported but will be clipped by
* decoders using the integer API and
* should only be used if it is known
* that the far end supports extended
* dynamic range.
* This must contain
* <code>frame_size*channels</code>
* samples.
* @param frame_size <tt>int</tt>: Number of samples per channel in the input
* signal.
* This must be an Opus frame size for the
* encoder's sampling rate.
* For example, at 48 kHz the permitted values
* are 120, 240, 480, 960, 1920, and 2880.
* Passing in a duration of less than 10 ms
* (480 samples at 48 kHz) will prevent the
* encoder from using the LPC or hybrid modes.
* @param[out] data <tt>unsigned char*</tt>: Output payload.
* This must contain storage for at
* least \a max_data_bytes.
* @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
* memory for the output
* payload. This may be
* used to impose an upper limit on
* the instant bitrate, but should
* not be used as the only bitrate
* control. Use #OPUS_SET_BITRATE to
* control the bitrate.
* @returns The length of the encoded packet (in bytes) on success or a
* negative error code (see @ref opus_errorcodes) on failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode_float(
OpusProjectionEncoder *st,
const float *pcm,
int frame_size,
unsigned char *data,
opus_int32 max_data_bytes
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
/** Frees an <code>OpusProjectionEncoder</code> allocated by
* opus_projection_ambisonics_encoder_create().
* @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to be freed.
*/
OPUS_EXPORT void opus_projection_encoder_destroy(OpusProjectionEncoder *st);
/** Perform a CTL function on a projection Opus encoder.
*
* Generally the request and subsequent arguments are generated by a
* convenience macro.
* @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
* @param request This and all remaining parameters should be replaced by one
* of the convenience macros in @ref opus_genericctls,
* @ref opus_encoderctls, @ref opus_multistream_ctls, or
* @ref opus_projection_ctls
* @see opus_genericctls
* @see opus_encoderctls
* @see opus_multistream_ctls
* @see opus_projection_ctls
*/
OPUS_EXPORT int opus_projection_encoder_ctl(OpusProjectionEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
/**@}*/
/**\name Projection decoder functions */
/**@{*/
/** Gets the size of an <code>OpusProjectionDecoder</code> structure.
* @param channels <tt>int</tt>: The total number of output channels.
* This must be no more than 255.
* @param streams <tt>int</tt>: The total number of streams coded in the
* input.
* This must be no more than 255.
* @param coupled_streams <tt>int</tt>: Number streams to decode as coupled
* (2 channel) streams.
* This must be no larger than the total
* number of streams.
* Additionally, The total number of
* coded channels (<code>streams +
* coupled_streams</code>) must be no
* more than 255.
* @returns The size in bytes on success, or a negative error code
* (see @ref opus_errorcodes) on error.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_decoder_get_size(
int channels,
int streams,
int coupled_streams
);
/** Allocates and initializes a projection decoder state.
* Call opus_projection_decoder_destroy() to release
* this object when finished.
* @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
* This must be one of 8000, 12000, 16000,
* 24000, or 48000.
* @param channels <tt>int</tt>: Number of channels to output.
* This must be at most 255.
* It may be different from the number of coded
* channels (<code>streams +
* coupled_streams</code>).
* @param streams <tt>int</tt>: The total number of streams coded in the
* input.
* This must be no more than 255.
* @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
* (2 channel) streams.
* This must be no larger than the total
* number of streams.
* Additionally, The total number of
* coded channels (<code>streams +
* coupled_streams</code>) must be no
* more than 255.
* @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix
* that mapping from coded channels to output channels,
* as described in @ref opus_projection and
* @ref opus_projection_ctls.
* @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the
* demixing matrix, as
* described in @ref
* opus_projection_ctls.
* @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
* code (see @ref opus_errorcodes) on
* failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionDecoder *opus_projection_decoder_create(
opus_int32 Fs,
int channels,
int streams,
int coupled_streams,
unsigned char *demixing_matrix,
opus_int32 demixing_matrix_size,
int *error
) OPUS_ARG_NONNULL(5);
/** Intialize a previously allocated projection decoder state object.
* The memory pointed to by \a st must be at least the size returned by
* opus_projection_decoder_get_size().
* This is intended for applications which use their own allocator instead of
* malloc.
* To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
* @see opus_projection_decoder_create
* @see opus_projection_deocder_get_size
* @param st <tt>OpusProjectionDecoder*</tt>: Projection encoder state to initialize.
* @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
* This must be one of 8000, 12000, 16000,
* 24000, or 48000.
* @param channels <tt>int</tt>: Number of channels to output.
* This must be at most 255.
* It may be different from the number of coded
* channels (<code>streams +
* coupled_streams</code>).
* @param streams <tt>int</tt>: The total number of streams coded in the
* input.
* This must be no more than 255.
* @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
* (2 channel) streams.
* This must be no larger than the total
* number of streams.
* Additionally, The total number of
* coded channels (<code>streams +
* coupled_streams</code>) must be no
* more than 255.
* @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix
* that mapping from coded channels to output channels,
* as described in @ref opus_projection and
* @ref opus_projection_ctls.
* @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the
* demixing matrix, as
* described in @ref
* opus_projection_ctls.
* @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
* on failure.
*/
OPUS_EXPORT int opus_projection_decoder_init(
OpusProjectionDecoder *st,
opus_int32 Fs,
int channels,
int streams,
int coupled_streams,
unsigned char *demixing_matrix,
opus_int32 demixing_matrix_size
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
/** Decode a projection Opus packet.
* @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
* @param[in] data <tt>const unsigned char*</tt>: Input payload.
* Use a <code>NULL</code>
* pointer to indicate packet
* loss.
* @param len <tt>opus_int32</tt>: Number of bytes in payload.
* @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
* samples.
* This must contain room for
* <code>frame_size*channels</code>
* samples.
* @param frame_size <tt>int</tt>: The number of samples per channel of
* available space in \a pcm.
* If this is less than the maximum packet duration
* (120 ms; 5760 for 48kHz), this function will not be capable
* of decoding some packets. In the case of PLC (data==NULL)
* or FEC (decode_fec=1), then frame_size needs to be exactly
* the duration of audio that is missing, otherwise the
* decoder will not be in the optimal state to decode the
* next incoming packet. For the PLC and FEC cases, frame_size
* <b>must</b> be a multiple of 2.5 ms.
* @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
* forward error correction data be decoded.
* If no such data is available, the frame is
* decoded as if it were lost.
* @returns Number of samples decoded on success or a negative error code
* (see @ref opus_errorcodes) on failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode(
OpusProjectionDecoder *st,
const unsigned char *data,
opus_int32 len,
opus_int16 *pcm,
int frame_size,
int decode_fec
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
/** Decode a projection Opus packet with floating point output.
* @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
* @param[in] data <tt>const unsigned char*</tt>: Input payload.
* Use a <code>NULL</code>
* pointer to indicate packet
* loss.
* @param len <tt>opus_int32</tt>: Number of bytes in payload.
* @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
* samples.
* This must contain room for
* <code>frame_size*channels</code>
* samples.
* @param frame_size <tt>int</tt>: The number of samples per channel of
* available space in \a pcm.
* If this is less than the maximum packet duration
* (120 ms; 5760 for 48kHz), this function will not be capable
* of decoding some packets. In the case of PLC (data==NULL)
* or FEC (decode_fec=1), then frame_size needs to be exactly
* the duration of audio that is missing, otherwise the
* decoder will not be in the optimal state to decode the
* next incoming packet. For the PLC and FEC cases, frame_size
* <b>must</b> be a multiple of 2.5 ms.
* @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
* forward error correction data be decoded.
* If no such data is available, the frame is
* decoded as if it were lost.
* @returns Number of samples decoded on success or a negative error code
* (see @ref opus_errorcodes) on failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode_float(
OpusProjectionDecoder *st,
const unsigned char *data,
opus_int32 len,
float *pcm,
int frame_size,
int decode_fec
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
/** Perform a CTL function on a projection Opus decoder.
*
* Generally the request and subsequent arguments are generated by a
* convenience macro.
* @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
* @param request This and all remaining parameters should be replaced by one
* of the convenience macros in @ref opus_genericctls,
* @ref opus_decoderctls, @ref opus_multistream_ctls, or
* @ref opus_projection_ctls.
* @see opus_genericctls
* @see opus_decoderctls
* @see opus_multistream_ctls
* @see opus_projection_ctls
*/
OPUS_EXPORT int opus_projection_decoder_ctl(OpusProjectionDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
/** Frees an <code>OpusProjectionDecoder</code> allocated by
* opus_projection_decoder_create().
* @param st <tt>OpusProjectionDecoder</tt>: Projection decoder state to be freed.
*/
OPUS_EXPORT void opus_projection_decoder_destroy(OpusProjectionDecoder *st);
/**@}*/
/**@}*/
#ifdef __cplusplus
}
#endif
#endif /* OPUS_PROJECTION_H */
+18 -11
View File
@@ -33,14 +33,29 @@
#ifndef OPUS_TYPES_H
#define OPUS_TYPES_H
/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
#include <stdint.h>
#define opus_int int /* used for counters etc; at least 16 bits */
#define opus_int64 long long
#define opus_int8 signed char
#define opus_uint unsigned int /* used for counters etc; at least 16 bits */
#define opus_uint64 unsigned long long
#define opus_uint8 unsigned char
/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
#if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
#include <stdint.h>
# undef opus_int64
# undef opus_int8
# undef opus_uint64
# undef opus_uint8
typedef int8_t opus_int8;
typedef uint8_t opus_uint8;
typedef int16_t opus_int16;
typedef uint16_t opus_uint16;
typedef int32_t opus_int32;
typedef uint32_t opus_uint32;
typedef int64_t opus_int64;
typedef uint64_t opus_uint64;
#elif defined(_WIN32)
# if defined(__CYGWIN__)
@@ -148,12 +163,4 @@
#endif
#define opus_int int /* used for counters etc; at least 16 bits */
#define opus_int64 long long
#define opus_int8 signed char
#define opus_uint unsigned int /* used for counters etc; at least 16 bits */
#define opus_uint64 unsigned long long
#define opus_uint8 unsigned char
#endif /* OPUS_TYPES_H */
+38 -10
View File
@@ -1,5 +1,4 @@
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
# vim: set filetype=python:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
@@ -20,8 +19,9 @@ ALLOW_COMPILER_WARNINGS = True
FINAL_LIBRARY = 'gkmedias'
DEFINES['OPUS_BUILD'] = True
DEFINES['OPUS_VERSION'] = '"v1.1.4-mozilla"'
DEFINES['OPUS_VERSION'] = '2654707e86cc94413998976d179b2ab4a2aa3114'
DEFINES['USE_ALLOCA'] = True
DEFINES['ENABLE_HARDENING'] = True
# Don't export symbols
DEFINES['OPUS_EXPORT'] = ''
@@ -62,6 +62,8 @@ LOCAL_INCLUDES += [
'celt',
'include',
'silk',
'silk/fixed',
'silk/float',
'src',
]
@@ -75,18 +77,32 @@ UNIFIED_SOURCES += opus_sources
SOURCES += opus_nonunified_sources
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
LOCAL_INCLUDES += [
'silk/float',
]
UNIFIED_SOURCES += silk_sources_float
UNIFIED_SOURCES += opus_sources_float
else:
LOCAL_INCLUDES += [
'silk/fixed',
]
UNIFIED_SOURCES += silk_sources_fixed
# for webrtc
UNIFIED_SOURCES += opus_sources_float
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
DEFINES['OPUS_HAVE_RTCD'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE2'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE4_1'] = True
DEFINES['OPUS_X86_MAY_HAVE_AVX'] = True
SOURCES += celt_sources_sse
SOURCES += celt_sources_sse2
SOURCES += celt_sources_sse4_1
SOURCES += silk_sources_sse4_1
if not CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
SOURCES += silk_sources_fixed_sse4_1
for f in SOURCES:
if f in celt_sources_sse:
SOURCES[f].flags += CONFIG['SSE_FLAGS']
if f in celt_sources_sse2:
SOURCES[f].flags += CONFIG['SSE2_FLAGS']
if f in celt_sources_sse4_1 or \
f in silk_sources_sse4_1 or \
f in silk_sources_fixed_sse4_1:
SOURCES[f].flags += ['-msse4.1']
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
SOURCES += celt_sources_arm
@@ -108,6 +124,18 @@ if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
]
ASFLAGS += CONFIG['NEON_FLAGS']
if CONFIG['CPU_ARCH'] == 'aarch64' and CONFIG['CC_TYPE'] in ('clang', 'gcc'):
DEFINES['OPUS_ARM_PRESUME_AARCH64_NEON_INTR'] = True
DEFINES['OPUS_ARM_PRESUME_NEON'] = True
DEFINES['OPUS_ARM_PRESUME_NEON_INTR'] = True
SOURCES += celt_sources_arm_neon_intr
SOURCES += silk_sources_arm_neon_intr
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
DEFINES['OPUS_ARM_MAY_HAVE_NEON'] = True
DEFINES['OPUS_ARM_MAY_HAVE_NEON_INTR'] = True
else:
SOURCES += silk_sources_fixed_arm_neon_intr
# Suppress warnings in third-party code.
if CONFIG['GNU_CC']:
if CONFIG['CLANG_CXX']:
+2 -2
View File
@@ -27,10 +27,10 @@ index 8a39b9f..dfc2c62 100644
celt_sources_sse = [
'celt/x86/pitch_sse.c',
'celt/x86/x86_celt_map.c',
@@ -105,8 +112,6 @@ silk_sources = [
'silk/log2lin.c',
@@ -114,8 +114,6 @@ silk_sources = [
'silk/LP_variable_cutoff.c',
'silk/LPC_analysis_filter.c',
'silk/LPC_fit.c',
- 'silk/LPC_inv_pred_gain.c',
- 'silk/NLSF2A.c',
'silk/NLSF_decode.c',
+34
View File
@@ -0,0 +1,34 @@
diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -1,12 +1,11 @@
# THIS FILE WAS AUTOMATICALLY GENERATED BY gen-sources.py. DO NOT EDIT.
celt_sources = [
'celt/bands.c',
- 'celt/celt.c',
'celt/celt_lpc.c',
'celt/cwrs.c',
'celt/entcode.c',
'celt/entdec.c',
'celt/entenc.c',
'celt/kiss_fft.c',
'celt/laplace.c',
'celt/mathops.c',
@@ -14,16 +13,18 @@ celt_sources = [
'celt/modes.c',
'celt/pitch.c',
'celt/quant_bands.c',
'celt/rate.c',
'celt/vq.c',
]
opus_nonunified_sources = [
+ # Disabled because of undefined reference to celt_fatal at link time
+ 'celt/celt.c',
# Disabled because of name clash of opus_custom_encoder_get_size.
'celt/celt_decoder.c',
'celt/celt_encoder.c',
# Disabled for (safe) warning about QA redefinition.
'silk/LPC_inv_pred_gain.c',
'silk/NLSF2A.c',
]
+4 -4
View File
@@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* Number of binary divisions, when not in low complexity mode */
#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
#define MAX_ITERATIONS_A2NLSF_FIX 30
#define MAX_ITERATIONS_A2NLSF_FIX 16
/* Helper function for A2NLSF(..) */
/* Transforms polynomials from cos(n*f) to cos(f)^n */
@@ -130,7 +130,7 @@ void silk_A2NLSF(
const opus_int d /* I Filter order (must be even) */
)
{
opus_int i, k, m, dd, root_ix, ffrac;
opus_int i, k, m, dd, root_ix, ffrac;
opus_int32 xlo, xhi, xmid;
opus_int32 ylo, yhi, ymid, thr;
opus_int32 nom, den;
@@ -239,13 +239,13 @@ void silk_A2NLSF(
/* Set NLSFs to white spectrum and exit */
NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
for( k = 1; k < d; k++ ) {
NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
NLSF[ k ] = (opus_int16)silk_ADD16( NLSF[ k-1 ], NLSF[ 0 ] );
}
return;
}
/* Error: Apply progressively more bandwidth expansion and run again */
silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
silk_bwexpander_32( a_Q16, d, 65536 - silk_LSHIFT( 1, i ) );
silk_A2NLSF_init( a_Q16, P, Q, dd );
p = P; /* Pointer to polynomial */
+2 -1
View File
@@ -80,7 +80,8 @@ opus_int silk_Encode( /* O Returns error co
opus_int nSamplesIn, /* I Number of samples in input vector */
ec_enc *psRangeEnc, /* I/O Compressor data structure */
opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
int activity /* I Decision of Opus voice activity detector */
);
/****************************************/
+10 -6
View File
@@ -118,6 +118,10 @@ void silk_CNG(
/* Smooth gains */
for( i = 0; i < psDec->nb_subfr; i++ ) {
psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
/* If the smoothed gain is 3 dB greater than this subframe's gain, use this subframe's gain to adapt faster. */
if( silk_SMULWW( psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_THRESHOLD_Q16 ) > psDecCtrl->Gains_Q16[ i ] ) {
psCNG->CNG_smth_Gain_Q16 = psDecCtrl->Gains_Q16[ i ];
}
}
}
@@ -138,16 +142,16 @@ void silk_CNG(
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
}
gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order, psDec->arch );
/* Generate CNG signal, by synthesis filtering */
silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
celt_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
for( i = 0; i < length; i++ ) {
silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
@@ -170,11 +174,11 @@ void silk_CNG(
}
/* Update states */
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( CNG_sig_Q14[ MAX_LPC_ORDER + i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
/* Scale with Gain and add to input signal */
frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
}
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else {
+14 -11
View File
@@ -39,6 +39,13 @@ POSSIBILITY OF SUCH DAMAGE.
/* first d output samples are set to zero */
/*******************************************/
/* OPT: Using celt_fir() for this function should be faster, but it may cause
integer overflows in intermediate values (not final results), which the
current implementation silences by casting to unsigned. Enabling
this should be safe in pretty much all cases, even though it is not technically
C89-compliant. */
#define USE_CELT_FIR 0
void silk_LPC_analysis_filter(
opus_int16 *out, /* O Output signal */
const opus_int16 *in, /* I Input signal */
@@ -49,8 +56,7 @@ void silk_LPC_analysis_filter(
)
{
opus_int j;
#ifdef FIXED_POINT
opus_int16 mem[SILK_MAX_ORDER_LPC];
#if defined(FIXED_POINT) && USE_CELT_FIR
opus_int16 num[SILK_MAX_ORDER_LPC];
#else
int ix;
@@ -58,19 +64,16 @@ void silk_LPC_analysis_filter(
const opus_int16 *in_ptr;
#endif
silk_assert( d >= 6 );
silk_assert( (d & 1) == 0 );
silk_assert( d <= len );
celt_assert( d >= 6 );
celt_assert( (d & 1) == 0 );
celt_assert( d <= len );
#ifdef FIXED_POINT
silk_assert( d <= SILK_MAX_ORDER_LPC );
#if defined(FIXED_POINT) && USE_CELT_FIR
celt_assert( d <= SILK_MAX_ORDER_LPC );
for ( j = 0; j < d; j++ ) {
num[ j ] = -B[ j ];
}
for (j=0;j<d;j++) {
mem[ j ] = in[ d - j - 1 ];
}
celt_fir( in + d, num, out + d, len - d, d, mem, arch );
celt_fir( in + d, num, out + d, len - d, d, arch );
for ( j = 0; j < d; j++ ) {
out[ j ] = 0;
}
+82
View File
@@ -0,0 +1,82 @@
/***********************************************************************
Copyright (c) 2013, Koen Vos. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "SigProc_FIX.h"
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around.
This logic is reused in _celt_lpc(). Any bug fixes should also be applied there. */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
)
{
opus_int i, k, idx = 0;
opus_int32 maxabs, absval, chirp_Q16;
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a_QIN[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QIN - QOUT );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
chirp_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a_QIN, d, chirp_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT ) );
a_QIN[ k ] = silk_LSHIFT( (opus_int32)a_QOUT[ k ], QIN - QOUT );
}
} else {
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT );
}
}
}
+42 -55
View File
@@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "SigProc_FIX.h"
#include "define.h"
#define QA 24
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
@@ -38,117 +39,103 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
static opus_int32 LPC_inverse_pred_gain_QA_c( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
const opus_int order /* I Prediction order */
)
{
opus_int k, n, mult2Q;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
opus_int32 *Aold_QA, *Anew_QA;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
Anew_QA = A_QA[ order & 1 ];
invGain_Q30 = (opus_int32)1 << 30;
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
for( k = order - 1; k > 0; k-- ) {
/* Check for stability */
if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update inverse gain */
/* invGain_Q30 range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
/* Swap pointers */
Aold_QA = Anew_QA;
Anew_QA = A_QA[ k & 1 ];
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update AR coefficient */
for( n = 0; n < k; n++ ) {
tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
for( n = 0; n < (k + 1) >> 1; n++ ) {
opus_int64 tmp64;
tmp1 = A_QA[ n ];
tmp2 = A_QA[ k - n - 1 ];
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ n ] = ( opus_int32 )tmp64;
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
}
}
/* Check for stability */
if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
/* Range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
/* Update inverse gain */
/* Range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= 1<<30 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
return invGain_Q30;
}
/* For input in Q12 domain */
opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
opus_int32 DC_resp = 0;
Anew_QA = Atmp_QA[ order & 1 ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
DC_resp += (opus_int32)A_Q12[ k ];
Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
Atmp_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
}
/* If the DC is unstable, we don't even need to do the full calculations */
if( DC_resp >= 4096 ) {
return 0;
}
return LPC_inverse_pred_gain_QA( Atmp_QA, order );
return LPC_inverse_pred_gain_QA_c( Atmp_QA, order );
}
#ifdef FIXED_POINT
/* For input in Q24 domain */
opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
const opus_int order /* I Prediction order */
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
Anew_QA = Atmp_QA[ order & 1 ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
}
return LPC_inverse_pred_gain_QA( Atmp_QA, order );
}
#endif
+1 -1
View File
@@ -130,6 +130,6 @@ void silk_LP_variable_cutoff(
/* ARMA low-pass filtering */
silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
silk_biquad_alt_stride1( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length);
}
}
+2 -10
View File
@@ -27,9 +27,9 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef SIGPROCFIX_API_MACROCOUNT_H
#define SIGPROCFIX_API_MACROCOUNT_H
#include <stdio.h>
#ifdef silk_MACRO_COUNT
#include <stdio.h>
#define varDefine opus_int64 ops_count = 0;
extern opus_int64 ops_count;
@@ -319,14 +319,6 @@ static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
return(tmp);
}
#undef silk_ADD_POS_SAT64
static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
opus_int64 tmp;
ops_count += 1;
tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
return(tmp);
}
#undef silk_LSHIFT8
static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
opus_int8 ret;
@@ -699,7 +691,7 @@ return(ret);
#undef silk_LIMIT_32
static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
static OPUS_INLINE opus_int32 silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
{
opus_int32 ret;
ops_count += 6;
+1 -2
View File
@@ -539,8 +539,7 @@ static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, cha
no checking needed for silk_POS_SAT32
no checking needed for silk_ADD_POS_SAT8
no checking needed for silk_ADD_POS_SAT16
no checking needed for silk_ADD_POS_SAT32
no checking needed for silk_ADD_POS_SAT64 */
no checking needed for silk_ADD_POS_SAT32 */
#undef silk_LSHIFT8
#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
+11 -48
View File
@@ -66,7 +66,8 @@ static OPUS_INLINE void silk_NLSF2A_find_poly(
void silk_NLSF2A(
opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
const opus_int d /* I filter order (should be even) */
const opus_int d, /* I filter order (should be even) */
int arch /* I Run-time architecture */
)
{
/* This ordering was found to maximize quality. It improves numerical accuracy of
@@ -83,15 +84,14 @@ void silk_NLSF2A(
opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
opus_int32 maxabs, absval, idx=0, sc_Q16;
silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
silk_assert( d==10||d==16 );
celt_assert( d==10 || d==16 );
/* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
ordering = d == 16 ? ordering16 : ordering10;
for( k = 0; k < d; k++ ) {
silk_assert(NLSF[k] >= 0 );
silk_assert( NLSF[k] >= 0 );
/* f_int on a scale 0-127 (rounded down) */
f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
@@ -126,52 +126,15 @@ void silk_NLSF2A(
a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */
}
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a32_QA1[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */
/* Convert int32 coefficients to Q12 int16 coefs */
silk_LPC_fit( a_Q12, a32_QA1, 12, QA + 1, d );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a32_QA1, d, sc_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( i = 0; silk_LPC_inverse_pred_gain( a_Q12, d, arch ) == 0 && i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */
a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
}
} else {
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
} else {
break;
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
}
+26 -18
View File
@@ -33,36 +33,44 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
void silk_NLSF_VQ(
opus_int32 err_Q26[], /* O Quantization errors [K] */
opus_int32 err_Q24[], /* O Quantization errors [K] */
const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */
const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */
const opus_int16 pWght_Q9[], /* I Codebook weights [K*LPC_order] */
const opus_int K, /* I Number of codebook vectors */
const opus_int LPC_order /* I Number of LPCs */
)
{
opus_int i, m;
opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26;
opus_int i, m;
opus_int32 diff_Q15, diffw_Q24, sum_error_Q24, pred_Q24;
const opus_int16 *w_Q9_ptr;
const opus_uint8 *cb_Q8_ptr;
silk_assert( LPC_order <= 16 );
silk_assert( ( LPC_order & 1 ) == 0 );
celt_assert( ( LPC_order & 1 ) == 0 );
/* Loop over codebook */
cb_Q8_ptr = pCB_Q8;
w_Q9_ptr = pWght_Q9;
for( i = 0; i < K; i++ ) {
sum_error_Q26 = 0;
for( m = 0; m < LPC_order; m += 2 ) {
/* Compute weighted squared quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
sum_error_Q24 = 0;
pred_Q24 = 0;
for( m = LPC_order-2; m >= 0; m -= 2 ) {
/* Compute weighted absolute predictive quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m + 1 ], (opus_int32)cb_Q8_ptr[ m + 1 ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m + 1 ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
/* Compute weighted squared quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
/* Compute weighted absolute predictive quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)cb_Q8_ptr[ m ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
silk_assert( sum_error_Q26 >= 0 );
silk_assert( sum_error_Q30 >= 0 );
silk_assert( sum_error_Q24 >= 0 );
}
err_Q26[ i ] = sum_error_Q26;
err_Q24[ i ] = sum_error_Q24;
cb_Q8_ptr += LPC_order;
w_Q9_ptr += LPC_order;
}
}
+2 -2
View File
@@ -48,8 +48,8 @@ void silk_NLSF_VQ_weights_laroia(
opus_int k;
opus_int32 tmp1_int, tmp2_int;
silk_assert( D > 0 );
silk_assert( ( D & 1 ) == 0 );
celt_assert( D > 0 );
celt_assert( ( D & 1 ) == 0 );
/* First value */
tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 );
+7 -15
View File
@@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
/* Predictive dequantizer for NLSF residuals */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
opus_int16 x_Q10[], /* O Output [ order ] */
const opus_int8 indices[], /* I Quantization indices [ order ] */
const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
@@ -70,15 +70,9 @@ void silk_NLSF_decode(
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int32 W_tmp_Q9, NLSF_Q15_tmp;
opus_int32 NLSF_Q15_tmp;
const opus_uint8 *pCB_element;
/* Decode first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
}
const opus_int16 *pCB_Wght_Q9;
/* Unpack entropy table indices and predictor for current CB1 index */
silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
@@ -86,13 +80,11 @@ void silk_NLSF_decode(
/* Predictive residual dequantizer */
silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
/* Apply inverse square-rooted weights and add to output */
/* Apply inverse square-rooted weights to first stage and add to output */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
NLSF_Q15_tmp = silk_ADD_LSHIFT32( silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), pCB_Wght_Q9[ i ] ), (opus_int16)pCB_element[ i ], 7 );
pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
}
+3 -5
View File
@@ -84,7 +84,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
nStates = 1;
RD_Q25[ 0 ] = 0;
prev_out_Q10[ 0 ] = 0;
for( i = order - 1; ; i-- ) {
for( i = order - 1; i >= 0; i-- ) {
rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
in_Q10 = x_Q10[ i ];
for( j = 0; j < nStates; j++ ) {
@@ -131,7 +131,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
}
if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
if( nStates <= NLSF_QUANT_DEL_DEC_STATES/2 ) {
/* double number of states and copy */
for( j = 0; j < nStates; j++ ) {
ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
@@ -140,7 +140,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] = ind[ j - nStates ][ i ];
}
} else if( i > 0 ) {
} else {
/* sort lower and upper half of RD_Q25, pairwise */
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
@@ -191,8 +191,6 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
}
} else { /* i == 0 */
break;
}
}
+12 -25
View File
@@ -37,9 +37,9 @@ POSSIBILITY OF SUCH DAMAGE.
/***********************/
opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */
opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */
opus_int16 *pNLSF_Q15, /* I/O (Un)quantized NLSF vector [ LPC_ORDER ] */
const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int16 *pW_Q2, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */
const opus_int nSurvivors, /* I Max survivors after first stage */
const opus_int signalType /* I Signal type: 0/1/2 */
@@ -47,34 +47,32 @@ opus_int32 silk_NLSF_encode( /* O Returns
{
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9, ret;
VARDECL( opus_int32, err_Q26 );
VARDECL( opus_int32, err_Q24 );
VARDECL( opus_int32, RD_Q25 );
VARDECL( opus_int, tempIndices1 );
VARDECL( opus_int8, tempIndices2 );
opus_int16 res_Q15[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int16 W_adj_Q5[ MAX_LPC_ORDER ];
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
const opus_uint8 *pCB_element, *iCDF_ptr;
const opus_int16 *pCB_Wght_Q9;
SAVE_STACK;
silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
silk_assert( signalType >= 0 && signalType <= 2 );
celt_assert( signalType >= 0 && signalType <= 2 );
silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
/* NLSF stabilization */
silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
/* First stage: VQ */
ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
ALLOC( err_Q24, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q24, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->CB1_Wght_Q9, psNLSF_CB->nVectors, psNLSF_CB->order );
/* Sort the quantization errors */
ALLOC( tempIndices1, nSurvivors, opus_int );
silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
silk_insertion_sort_increasing( err_Q24, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
ALLOC( RD_Q25, nSurvivors, opus_int32 );
ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
@@ -85,23 +83,12 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Residual after first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ ind1 * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
}
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
/* Apply square-rooted weights */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
}
/* Modify input weights accordingly */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
W_tmp_Q9 = pCB_Wght_Q9[ i ];
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ], W_tmp_Q9 ), 14 );
W_adj_Q5[ i ] = silk_DIV32_varQ( (opus_int32)pW_Q2[ i ], silk_SMULBB( W_tmp_Q9, W_tmp_Q9 ), 21 );
}
/* Unpack entropy table indices and predictor for current CB1 index */
+56 -48
View File
@@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@@ -75,21 +75,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
void silk_NSQ_c
(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
)
{
opus_int k, lag, start_idx, LSF_interpolation_flag;
@@ -117,8 +117,7 @@ void silk_NSQ_c
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
/* Set up pointers to start of sub frame */
@@ -128,7 +127,7 @@ void silk_NSQ_c
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@@ -144,7 +143,7 @@ void silk_NSQ_c
if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
/* Rewhiten with new A coefs */
start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
silk_assert( start_idx > 0 );
celt_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
@@ -154,13 +153,13 @@ void silk_NSQ_c
}
}
silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_nsq_scale_states( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@@ -169,15 +168,14 @@ void silk_NSQ_c
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech and noise shaping signals */
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
}
/***********************************/
/* silk_noise_shape_quantizer */
/***********************************/
/******************************/
/* silk_noise_shape_quantizer */
/******************************/
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE
@@ -249,22 +247,22 @@ void silk_noise_shape_quantizer(
}
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(&NSQ->sDiff_shp_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
silk_assert( lag > 0 || signalType != TYPE_VOICED );
celt_assert( lag > 0 || signalType != TYPE_VOICED );
/* Combine prediction and noise shaping signals */
tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */
tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */
if( lag > 0 ) {
/* Symmetric, packed FIR coefficients */
n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
n_LTP_Q13 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
shp_lag_ptr++;
@@ -279,14 +277,27 @@ void silk_noise_shape_quantizer(
r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */
/* Flip sign depending on dither */
if ( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
if( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
}
r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@@ -337,7 +348,8 @@ void silk_noise_shape_quantizer(
/* Update states */
psLPC_Q14++;
*psLPC_Q14 = xq_Q14;
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 );
NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
@@ -356,7 +368,7 @@ void silk_noise_shape_quantizer(
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@@ -368,28 +380,18 @@ static OPUS_INLINE void silk_nsq_scale_states(
)
{
opus_int i, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@@ -403,7 +405,9 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@@ -417,6 +421,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@@ -425,5 +430,8 @@ static OPUS_INLINE void silk_nsq_scale_states(
for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}
+73 -56
View File
@@ -43,6 +43,7 @@ typedef struct {
opus_int32 Shape_Q14[ DECISION_DELAY ];
opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 Seed;
opus_int32 SeedInit;
opus_int32 RD_Q10;
@@ -53,6 +54,7 @@ typedef struct {
opus_int32 RD_Q10;
opus_int32 xq_Q14;
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 sLTP_shp_Q14;
opus_int32 LPC_exc_Q14;
} NSQ_sample_struct;
@@ -66,7 +68,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@@ -107,27 +109,27 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay, /* I */
int arch /* I */
);
void silk_NSQ_del_dec_c(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
)
{
opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
@@ -159,6 +161,7 @@ void silk_NSQ_del_dec_c(
psDD->SeedInit = psDD->Seed;
psDD->RD_Q10 = 0;
psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14;
psDD->Diff_Q14 = NSQ->sDiff_shp_Q14;
psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
@@ -186,8 +189,7 @@ void silk_NSQ_del_dec_c(
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
@@ -199,7 +201,7 @@ void silk_NSQ_del_dec_c(
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@@ -235,7 +237,8 @@ void silk_NSQ_del_dec_c(
psDD = &psDelDec[ Winner_ind ];
last_smple_idx = smpl_buf_idx + decisionDelay;
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
@@ -247,7 +250,7 @@ void silk_NSQ_del_dec_c(
/* Rewhiten with new A coefs */
start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
silk_assert( start_idx > 0 );
celt_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
@@ -257,7 +260,7 @@ void silk_NSQ_del_dec_c(
}
}
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k,
psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
@@ -265,7 +268,7 @@ void silk_NSQ_del_dec_c(
Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@@ -286,7 +289,9 @@ void silk_NSQ_del_dec_c(
last_smple_idx = smpl_buf_idx + decisionDelay;
Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
@@ -297,10 +302,10 @@ void silk_NSQ_del_dec_c(
/* Update states */
NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
NSQ->sDiff_shp_Q14 = psDD->Diff_Q14;
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech signal */
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
@@ -335,7 +340,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay, /* I */
int arch /* I */
)
@@ -356,7 +361,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
NSQ_sample_struct *psSS;
SAVE_STACK;
silk_assert( nStatesDelayedDecision > 0 );
celt_assert( nStatesDelayedDecision > 0 );
ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
@@ -389,8 +394,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Long-term shaping */
if( lag > 0 ) {
/* Symmetric, packed FIR coefficients */
n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
n_LTP_Q14 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */
shp_lag_ptr++;
} else {
@@ -414,9 +419,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
/* Output of lowpass section */
tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 );
/* Output of allpass section */
tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
psDD->sAR2_Q14[ 0 ] = tmp2;
@@ -446,9 +451,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Input minus prediction plus noise feedback */
/* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */
tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */
tmp1 = silk_ADD_SAT32( n_AR_Q14, n_LF_Q14 ); /* Q14 */
tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */
tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */
tmp1 = silk_SUB_SAT32( tmp2, tmp1 ); /* Q13 */
tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */
@@ -462,6 +467,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@@ -515,8 +533,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 );
psSS[ 0 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
psSS[ 0 ].xq_Q14 = xq_Q14;
@@ -529,21 +548,22 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
exc_Q14 = -exc_Q14;
}
/* Add predictions */
LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 );
psSS[ 1 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
psSS[ 1 ].xq_Q14 = xq_Q14;
}
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
/* Find winner */
RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
@@ -607,6 +627,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
psDD = &psDelDec[ k ];
psSS = &psSampleState[ k ][ 0 ];
psDD->LF_AR_Q14 = psSS->LF_AR_Q14;
psDD->Diff_Q14 = psSS->Diff_Q14;
psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14;
psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10;
@@ -631,7 +652,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@@ -645,29 +666,19 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
)
{
opus_int i, k, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
NSQ_del_dec_struct *psDD;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@@ -681,7 +692,9 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@@ -699,6 +712,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
/* Scale scalar states */
psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@@ -712,5 +726,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
}
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}
+3 -3
View File
@@ -275,7 +275,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Reduce random noise for unvoiced frames with high LPC gain */
opus_int32 invGain_Q30, down_scale_Q30;
invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order );
invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order, arch );
down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 );
down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 );
@@ -291,7 +291,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Rewhiten LTP state */
idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
silk_assert( idx > 0 );
celt_assert( idx > 0 );
silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch );
/* Scale LTP state */
inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
@@ -345,7 +345,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Copy LPC state */
silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
celt_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
for( i = 0; i < psDec->frame_length; i++ ) {
/* partly unrolled */
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+44 -18
View File
@@ -35,7 +35,7 @@ extern "C"
/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */
#define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */
#include <string.h> /* for memset(), memcpy(), memmove() */
#include "typedef.h"
@@ -47,6 +47,11 @@ extern "C"
#include "x86/SigProc_FIX_sse.h"
#endif
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/biquad_alt_arm.h"
#include "arm/LPC_inv_pred_gain_arm.h"
#endif
/********************************************************************/
/* SIGNAL PROCESSING FUNCTIONS */
/********************************************************************/
@@ -96,14 +101,22 @@ void silk_resampler_down2_3(
* slower than biquad() but uses more precise coefficients
* can handle (slowly) varying coefficients
*/
void silk_biquad_alt(
void silk_biquad_alt_stride1(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [2] */
opus_int16 *out, /* O output signal */
const opus_int32 len, /* I signal length (must be even) */
opus_int stride /* I Operate on interleaved signal if > 1 */
const opus_int32 len /* I signal length (must be even) */
);
void silk_biquad_alt_stride2_c(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
/* Variable order MA prediction error filter. */
@@ -132,17 +145,11 @@ void silk_bwexpander_32(
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
);
/* For input in Q24 domain */
opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
const opus_int order /* I Prediction order */
);
/* Split signal in two decimated bands using first-order allpass filters */
void silk_ana_filt_bank_1(
const opus_int16 *in, /* I Input signal [N] */
@@ -152,6 +159,14 @@ void silk_ana_filt_bank_1(
const opus_int32 N /* I Number of input samples */
);
#if !defined(OVERRIDE_silk_biquad_alt_stride2)
#define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
#endif
#if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
#define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
#endif
/********************************************************************/
/* SCALAR FUNCTIONS */
/********************************************************************/
@@ -271,7 +286,17 @@ void silk_A2NLSF(
void silk_NLSF2A(
opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
const opus_int d /* I filter order (should be even) */
const opus_int d, /* I filter order (should be even) */
int arch /* I Run-time architecture */
);
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
);
void silk_insertion_sort_increasing(
@@ -356,7 +381,7 @@ opus_int32 silk_inner_prod_aligned_scale(
const opus_int len /* I vector lengths */
);
opus_int64 silk_inner_prod16_aligned_64_c(
opus_int64 silk_inner_prod16_c(
const opus_int16 *inVec1, /* I input vector 1 */
const opus_int16 *inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
@@ -471,8 +496,7 @@ static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
/* Add with saturation for positive input values */
#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
@@ -572,7 +596,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* Make sure to store the result as the seed for the next call (also in between */
/* frames), otherwise result won't be random at all. When only using some of the */
/* bits, take the most significant bits by right-shifting. */
#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
#define RAND_MULTIPLIER 196314165
#define RAND_INCREMENT 907633515
#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
/* Add some multiplication functions that can be easily mapped to ARM. */
@@ -587,8 +613,8 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
#define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
#endif
#include "Inlines.h"
+10 -12
View File
@@ -101,9 +101,9 @@ opus_int silk_VAD_GetSA_Q8_c( /* O Return v
/* Safety checks */
silk_assert( VAD_N_BANDS == 4 );
silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
silk_assert( psEncC->frame_length <= 512 );
silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
celt_assert( psEncC->frame_length <= 512 );
celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
/***********************/
/* Filter and Decimate */
@@ -252,15 +252,14 @@ opus_int silk_VAD_GetSA_Q8_c( /* O Return v
speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
}
if( psEncC->frame_length == 20 * psEncC->fs_kHz ) {
speech_nrg = silk_RSHIFT32( speech_nrg, 1 );
}
/* Power scaling */
if( speech_nrg <= 0 ) {
SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
} else if( speech_nrg < 32768 ) {
if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
} else {
speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
}
} else if( speech_nrg < 16384 ) {
speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
/* square-root */
speech_nrg = silk_SQRT_APPROX( speech_nrg );
@@ -313,6 +312,8 @@ void silk_VAD_GetNoiseLevels(
/* Initially faster smoothing */
if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */
min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 );
/* Increment frame counter */
psSilk_VAD->counter++;
} else {
min_coef = 0;
}
@@ -356,7 +357,4 @@ void silk_VAD_GetNoiseLevels(
/* Store as part of state */
psSilk_VAD->NL[ k ] = nl;
}
/* Increment frame counter */
psSilk_VAD->counter++;
}
+65 -54
View File
@@ -34,84 +34,95 @@ POSSIBILITY OF SUCH DAMAGE.
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
void silk_VQ_WMat_EC_c(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
opus_int32 *res_nrg_Q15, /* O best residual energy */
opus_int32 *rate_dist_Q8, /* O best total bitrate */
opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
const opus_int16 *in_Q14, /* I input vector to be quantized */
const opus_int32 *W_Q18, /* I weighting matrix */
const opus_int32 *XX_Q17, /* I correlation matrix */
const opus_int32 *xX_Q17, /* I correlation vector */
const opus_int8 *cb_Q7, /* I codebook */
const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
const opus_int subfr_len, /* I number of samples per subframe */
const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
opus_int L /* I number of vectors in codebook */
const opus_int L /* I number of vectors in codebook */
)
{
opus_int k, gain_tmp_Q7;
const opus_int8 *cb_row_Q7;
opus_int16 diff_Q14[ 5 ];
opus_int32 sum1_Q14, sum2_Q16;
opus_int32 neg_xX_Q24[ 5 ];
opus_int32 sum1_Q15, sum2_Q24;
opus_int32 bits_res_Q8, bits_tot_Q8;
/* Negate and convert to new Q domain */
neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 );
neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 );
neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 );
neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );
neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );
/* Loop over codebook */
*rate_dist_Q14 = silk_int32_MAX;
*rate_dist_Q8 = silk_int32_MAX;
*res_nrg_Q15 = silk_int32_MAX;
cb_row_Q7 = cb_Q7;
/* If things go really bad, at least *ind is set to something safe. */
*ind = 0;
for( k = 0; k < L; k++ ) {
opus_int32 penalty;
gain_tmp_Q7 = cb_gain_Q7[k];
diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
/* Weighted rate */
sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
/* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */
sum1_Q15 = SILK_FIX_CONST( 1.001, 15 );
/* Penalty for too large gain */
sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 );
silk_assert( sum1_Q14 >= 0 );
/* first row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[ 1 ], cb_row_Q7[ 1 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 2 ], cb_row_Q7[ 2 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 3 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 4 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] );
/* first row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );
/* second row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] );
/* second row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );
/* third row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] );
/* third row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] );
/* fourth row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] );
/* fourth row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] );
/* last row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );
silk_assert( sum1_Q14 >= 0 );
/* last row of XX_Q17 */
sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] );
/* find best */
if( sum1_Q14 < *rate_dist_Q14 ) {
*rate_dist_Q14 = sum1_Q14;
*ind = (opus_int8)k;
*gain_Q7 = gain_tmp_Q7;
if( sum1_Q15 >= 0 ) {
/* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */
bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) );
/* In the following line we reduce the codelength component by half ("-1"); seems to slightly improve quality */
bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 );
if( bits_tot_Q8 <= *rate_dist_Q8 ) {
*rate_dist_Q8 = bits_tot_Q8;
*res_nrg_Q15 = sum1_Q15 + penalty;
*ind = (opus_int8)k;
*gain_Q7 = gain_tmp_Q7;
}
}
/* Go to next cbk vector */
@@ -0,0 +1,57 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_LPC_INV_PRED_GAIN_ARM_H
# define SILK_LPC_INV_PRED_GAIN_ARM_H
# include "celt/arm/armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), PRESUME_NEON(silk_LPC_inverse_pred_gain)(A_Q12, order))
# endif
# endif
# if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK+1])(const opus_int16 *A_Q12, const opus_int order);
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((*SILK_LPC_INVERSE_PRED_GAIN_IMPL[(arch)&OPUS_ARCHMASK])(A_Q12, order))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_neon(A_Q12, order))
# endif
# endif
#endif /* end SILK_LPC_INV_PRED_GAIN_ARM_H */
@@ -0,0 +1,288 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "SigProc_FIX.h"
#include "define.h"
#define QA 24
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
/* The difficulty is how to judge a 64-bit signed integer tmp64 is 32-bit overflowed,
* since NEON has no 64-bit min, max or comparison instructions.
* A failed idea is to compare the results of vmovn(tmp64) and vqmovn(tmp64) whether they are equal or not.
* However, this idea fails when the tmp64 is something like 0xFFFFFFF980000000.
* Here we know that mult2Q >= 1, so the highest bit (bit 63, sign bit) of tmp64 must equal to bit 62.
* tmp64 was shifted left by 1 and we got tmp64'. If high_half(tmp64') != 0 and high_half(tmp64') != -1,
* then we know that bit 31 to bit 63 of tmp64 can not all be the sign bit, and therefore tmp64 is 32-bit overflowed.
* That is, we judge if tmp64' > 0x00000000FFFFFFFF, or tmp64' <= 0xFFFFFFFF00000000.
* We use narrowing shift right 31 bits to tmp32' to save data bandwidth and instructions.
* That is, we judge if tmp32' > 0x00000000, or tmp32' <= 0xFFFFFFFF.
*/
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
static OPUS_INLINE opus_int32 LPC_inverse_pred_gain_QA_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
const opus_int order /* I Prediction order */
)
{
opus_int k, n, mult2Q;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
opus_int32 max, min;
int32x4_t max_s32x4, min_s32x4;
int32x2_t max_s32x2, min_s32x2;
max_s32x4 = vdupq_n_s32( silk_int32_MIN );
min_s32x4 = vdupq_n_s32( silk_int32_MAX );
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
for( k = order - 1; k > 0; k-- ) {
int32x2_t rc_Q31_s32x2, rc_mult2_s32x2;
int64x2_t mult2Q_s64x2;
/* Check for stability */
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
/* Update inverse gain */
/* invGain_Q30 range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update AR coefficient */
rc_Q31_s32x2 = vdup_n_s32( rc_Q31 );
mult2Q_s64x2 = vdupq_n_s64( -mult2Q );
rc_mult2_s32x2 = vdup_n_s32( rc_mult2 );
for( n = 0; n < ( ( k + 1 ) >> 1 ) - 3; n += 4 ) {
/* We always calculate extra elements of A_QA buffer when ( k % 4 ) != 0, to take the advantage of SIMD parallelization. */
int32x4_t tmp1_s32x4, tmp2_s32x4, t0_s32x4, t1_s32x4, s0_s32x4, s1_s32x4, t_QA0_s32x4, t_QA1_s32x4;
int64x2_t t0_s64x2, t1_s64x2, t2_s64x2, t3_s64x2;
tmp1_s32x4 = vld1q_s32( A_QA + n );
tmp2_s32x4 = vld1q_s32( A_QA + k - n - 4 );
tmp2_s32x4 = vrev64q_s32( tmp2_s32x4 );
tmp2_s32x4 = vcombine_s32( vget_high_s32( tmp2_s32x4 ), vget_low_s32( tmp2_s32x4 ) );
t0_s32x4 = vqrdmulhq_lane_s32( tmp2_s32x4, rc_Q31_s32x2, 0 );
t1_s32x4 = vqrdmulhq_lane_s32( tmp1_s32x4, rc_Q31_s32x2, 0 );
t_QA0_s32x4 = vqsubq_s32( tmp1_s32x4, t0_s32x4 );
t_QA1_s32x4 = vqsubq_s32( tmp2_s32x4, t1_s32x4 );
t0_s64x2 = vmull_s32( vget_low_s32 ( t_QA0_s32x4 ), rc_mult2_s32x2 );
t1_s64x2 = vmull_s32( vget_high_s32( t_QA0_s32x4 ), rc_mult2_s32x2 );
t2_s64x2 = vmull_s32( vget_low_s32 ( t_QA1_s32x4 ), rc_mult2_s32x2 );
t3_s64x2 = vmull_s32( vget_high_s32( t_QA1_s32x4 ), rc_mult2_s32x2 );
t0_s64x2 = vrshlq_s64( t0_s64x2, mult2Q_s64x2 );
t1_s64x2 = vrshlq_s64( t1_s64x2, mult2Q_s64x2 );
t2_s64x2 = vrshlq_s64( t2_s64x2, mult2Q_s64x2 );
t3_s64x2 = vrshlq_s64( t3_s64x2, mult2Q_s64x2 );
t0_s32x4 = vcombine_s32( vmovn_s64( t0_s64x2 ), vmovn_s64( t1_s64x2 ) );
t1_s32x4 = vcombine_s32( vmovn_s64( t2_s64x2 ), vmovn_s64( t3_s64x2 ) );
s0_s32x4 = vcombine_s32( vshrn_n_s64( t0_s64x2, 31 ), vshrn_n_s64( t1_s64x2, 31 ) );
s1_s32x4 = vcombine_s32( vshrn_n_s64( t2_s64x2, 31 ), vshrn_n_s64( t3_s64x2, 31 ) );
max_s32x4 = vmaxq_s32( max_s32x4, s0_s32x4 );
min_s32x4 = vminq_s32( min_s32x4, s0_s32x4 );
max_s32x4 = vmaxq_s32( max_s32x4, s1_s32x4 );
min_s32x4 = vminq_s32( min_s32x4, s1_s32x4 );
t1_s32x4 = vrev64q_s32( t1_s32x4 );
t1_s32x4 = vcombine_s32( vget_high_s32( t1_s32x4 ), vget_low_s32( t1_s32x4 ) );
vst1q_s32( A_QA + n, t0_s32x4 );
vst1q_s32( A_QA + k - n - 4, t1_s32x4 );
}
for( ; n < (k + 1) >> 1; n++ ) {
opus_int64 tmp64;
tmp1 = A_QA[ n ];
tmp2 = A_QA[ k - n - 1 ];
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ n ] = ( opus_int32 )tmp64;
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
}
}
/* Check for stability */
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
max_s32x2 = vmax_s32( vget_low_s32( max_s32x4 ), vget_high_s32( max_s32x4 ) );
min_s32x2 = vmin_s32( vget_low_s32( min_s32x4 ), vget_high_s32( min_s32x4 ) );
max_s32x2 = vmax_s32( max_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( max_s32x2 ), 32 ) ) );
min_s32x2 = vmin_s32( min_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( min_s32x2 ), 32 ) ) );
max = vget_lane_s32( max_s32x2, 0 );
min = vget_lane_s32( min_s32x2, 0 );
if( ( max > 0 ) || ( min < -1 ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
/* Range: [ 1 : 2^30 ] */
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
/* Update inverse gain */
/* Range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
return invGain_Q30;
}
/* For input in Q12 domain */
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
)
{
#ifdef OPUS_CHECK_ASM
const opus_int32 invGain_Q30_c = silk_LPC_inverse_pred_gain_c( A_Q12, order );
#endif
opus_int32 invGain_Q30;
if( ( SILK_MAX_ORDER_LPC != 24 ) || ( order & 1 )) {
invGain_Q30 = silk_LPC_inverse_pred_gain_c( A_Q12, order );
}
else {
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
opus_int32 DC_resp;
int16x8_t t0_s16x8, t1_s16x8, t2_s16x8;
int32x4_t t0_s32x4;
const opus_int leftover = order & 7;
/* Increase Q domain of the AR coefficients */
t0_s16x8 = vld1q_s16( A_Q12 + 0 );
t1_s16x8 = vld1q_s16( A_Q12 + 8 );
if ( order > 16 ) {
t2_s16x8 = vld1q_s16( A_Q12 + 16 );
}
t0_s32x4 = vpaddlq_s16( t0_s16x8 );
switch( order - leftover )
{
case 24:
t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 );
vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) );
/* FALLTHROUGH */
case 16:
t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 );
vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) );
/* FALLTHROUGH */
case 8:
{
const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) );
const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 );
DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 );
vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) );
}
break;
default:
DC_resp = 0;
break;
}
A_Q12 += order - leftover;
switch( leftover )
{
case 6:
DC_resp += (opus_int32)A_Q12[ 5 ];
DC_resp += (opus_int32)A_Q12[ 4 ];
Atmp_QA[ order - leftover + 5 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 5 ], QA - 12 );
Atmp_QA[ order - leftover + 4 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 4 ], QA - 12 );
/* FALLTHROUGH */
case 4:
DC_resp += (opus_int32)A_Q12[ 3 ];
DC_resp += (opus_int32)A_Q12[ 2 ];
Atmp_QA[ order - leftover + 3 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 3 ], QA - 12 );
Atmp_QA[ order - leftover + 2 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 2 ], QA - 12 );
/* FALLTHROUGH */
case 2:
DC_resp += (opus_int32)A_Q12[ 1 ];
DC_resp += (opus_int32)A_Q12[ 0 ];
Atmp_QA[ order - leftover + 1 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 1 ], QA - 12 );
Atmp_QA[ order - leftover + 0 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 0 ], QA - 12 );
/* FALLTHROUGH */
default:
break;
}
/* If the DC is unstable, we don't even need to do the full calculations */
if( DC_resp >= 4096 ) {
invGain_Q30 = 0;
} else {
invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order );
}
}
#ifdef OPUS_CHECK_ASM
silk_assert( invGain_Q30_c == invGain_Q30 );
#endif
return invGain_Q30;
}
+100
View File
@@ -0,0 +1,100 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_DEL_DEC_ARM_H
#define SILK_NSQ_DEL_DEC_ARM_H
#include "celt/arm/armcpu.h"
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_NSQ_del_dec_neon(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
PRESUME_NEON(silk_NSQ_del_dec)( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#endif
#endif
#if !defined(OVERRIDE_silk_NSQ_del_dec)
/*Is run-time CPU detection enabled on this platform?*/
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14))
#endif
#endif
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */
File diff suppressed because it is too large Load Diff
+17 -16
View File
@@ -28,30 +28,31 @@ POSSIBILITY OF SUCH DAMAGE.
#define SILK_NSQ_NEON_H
#include "cpu_support.h"
#include "SigProc_FIX.h"
#undef silk_short_prediction_create_arch_coef
/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
{
out[15] = in[0] << 15;
out[14] = in[1] << 15;
out[13] = in[2] << 15;
out[12] = in[3] << 15;
out[11] = in[4] << 15;
out[10] = in[5] << 15;
out[9] = in[6] << 15;
out[8] = in[7] << 15;
out[7] = in[8] << 15;
out[6] = in[9] << 15;
out[15] = silk_LSHIFT32(in[0], 15);
out[14] = silk_LSHIFT32(in[1], 15);
out[13] = silk_LSHIFT32(in[2], 15);
out[12] = silk_LSHIFT32(in[3], 15);
out[11] = silk_LSHIFT32(in[4], 15);
out[10] = silk_LSHIFT32(in[5], 15);
out[9] = silk_LSHIFT32(in[6], 15);
out[8] = silk_LSHIFT32(in[7], 15);
out[7] = silk_LSHIFT32(in[8], 15);
out[6] = silk_LSHIFT32(in[9], 15);
if (order == 16)
{
out[5] = in[10] << 15;
out[4] = in[11] << 15;
out[3] = in[12] << 15;
out[2] = in[13] << 15;
out[1] = in[14] << 15;
out[0] = in[15] << 15;
out[5] = silk_LSHIFT32(in[10], 15);
out[4] = silk_LSHIFT32(in[11], 15);
out[3] = silk_LSHIFT32(in[12], 15);
out[2] = silk_LSHIFT32(in[13], 15);
out[1] = silk_LSHIFT32(in[14], 15);
out[0] = silk_LSHIFT32(in[15], 15);
}
else
{
+68
View File
@@ -28,13 +28,62 @@ POSSIBILITY OF SUCH DAMAGE.
# include "config.h"
#endif
#include "main_FIX.h"
#include "NSQ.h"
#include "SigProc_FIX.h"
#if defined(OPUS_HAVE_RTCD)
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
) = {
silk_biquad_alt_stride2_c, /* ARMv4 */
silk_biquad_alt_stride2_c, /* EDSP */
silk_biquad_alt_stride2_c, /* Media */
silk_biquad_alt_stride2_neon, /* Neon */
};
opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
) = {
silk_LPC_inverse_pred_gain_c, /* ARMv4 */
silk_LPC_inverse_pred_gain_c, /* EDSP */
silk_LPC_inverse_pred_gain_c, /* Media */
silk_LPC_inverse_pred_gain_neon, /* Neon */
};
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
) = {
silk_NSQ_del_dec_c, /* ARMv4 */
silk_NSQ_del_dec_c, /* EDSP */
silk_NSQ_del_dec_c, /* Media */
silk_NSQ_del_dec_neon, /* Neon */
};
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
NEON version takes different parameters than the C version.
Instead RTCD is done via if statements at the call sites.
@@ -52,4 +101,23 @@ opus_int32
# endif
# if defined(FIXED_POINT) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *corr, /* O Result [order + 1] */
opus_int *scale, /* O Scaling of the correlation vector */
const opus_int16 *input, /* I Input data to correlate */
const opus_int warping_Q16, /* I Warping coefficient */
const opus_int length, /* I Length of input */
const opus_int order /* I Correlation order (even) */
) = {
silk_warped_autocorrelation_FIX_c, /* ARMv4 */
silk_warped_autocorrelation_FIX_c, /* EDSP */
silk_warped_autocorrelation_FIX_c, /* Media */
silk_warped_autocorrelation_FIX_neon, /* Neon */
};
# endif
#endif /* OPUS_HAVE_RTCD */
+68
View File
@@ -0,0 +1,68 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_BIQUAD_ALT_ARM_H
# define SILK_BIQUAD_ALT_ARM_H
# include "celt/arm/armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_biquad_alt_stride2_neon(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), PRESUME_NEON(silk_biquad_alt_stride2)(in, B_Q28, A_Q28, S, out, len))
# endif
# endif
# if !defined(OVERRIDE_silk_biquad_alt_stride2)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK+1])(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((*SILK_BIQUAD_ALT_STRIDE2_IMPL[(arch)&OPUS_ARCHMASK])(in, B_Q28, A_Q28, S, out, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_neon(in, B_Q28, A_Q28, S, out, len))
# endif
# endif
#endif /* end SILK_BIQUAD_ALT_ARM_H */
@@ -0,0 +1,156 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#ifdef OPUS_CHECK_ASM
# include <string.h>
# include "stack_alloc.h"
#endif
#include "SigProc_FIX.h"
static inline void silk_biquad_alt_stride2_kernel( const int32x4_t A_L_s32x4, const int32x4_t A_U_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t in_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2 )
{
int32x4_t t_s32x4, out32_Q14_s32x4;
*out32_Q14_s32x2 = vadd_s32( vget_low_s32( *S_s32x4 ), t_s32x2 ); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ) */
*S_s32x4 = vcombine_s32( vget_high_s32( *S_s32x4 ), vdup_n_s32( 0 ) ); /* S{0,1} = S{2,3}; S{2,3} = 0; */
*out32_Q14_s32x2 = vshl_n_s32( *out32_Q14_s32x2, 2 ); /* out32_Q14_{0,1} = silk_LSHIFT( silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ), 2 ); */
out32_Q14_s32x4 = vcombine_s32( *out32_Q14_s32x2, *out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} */
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_L_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_L_Q28 ) */
*S_s32x4 = vrsraq_n_s32( *S_s32x4, t_s32x4, 14 ); /* S{0,1} = S{2,3} + silk_RSHIFT_ROUND(); S{2,3} = silk_RSHIFT_ROUND(); */
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_U_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ) */
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S{0,1,2,3}, out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ); */
t_s32x4 = vqdmulhq_s32( in_s32x4, B_Q28_s32x4 ); /* silk_SMULWB( B_Q28[ {1,1,2,2} ], in{0,1,0,1} ) */
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S0, B_Q28[ {1,1,2,2} ], in{0,1,0,1} ); */
}
void silk_biquad_alt_stride2_neon(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
)
{
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
opus_int k = 0;
const int32x2_t offset_s32x2 = vdup_n_s32( (1<<14) - 1 );
const int32x4_t offset_s32x4 = vcombine_s32( offset_s32x2, offset_s32x2 );
int16x4_t in_s16x4 = vdup_n_s16( 0 );
int16x4_t out_s16x4;
int32x2_t A_Q28_s32x2, A_L_s32x2, A_U_s32x2, B_Q28_s32x2, t_s32x2;
int32x4_t A_L_s32x4, A_U_s32x4, B_Q28_s32x4, S_s32x4, out32_Q14_s32x4;
int32x2x2_t t0_s32x2x2, t1_s32x2x2, t2_s32x2x2, S_s32x2x2;
#ifdef OPUS_CHECK_ASM
opus_int32 S_c[ 4 ];
VARDECL( opus_int16, out_c );
SAVE_STACK;
ALLOC( out_c, 2 * len, opus_int16 );
silk_memcpy( &S_c, S, sizeof( S_c ) );
silk_biquad_alt_stride2_c( in, B_Q28, A_Q28, S_c, out_c, len );
#endif
/* Negate A_Q28 values and split in two parts */
A_Q28_s32x2 = vld1_s32( A_Q28 );
A_Q28_s32x2 = vneg_s32( A_Q28_s32x2 );
A_L_s32x2 = vshl_n_s32( A_Q28_s32x2, 18 ); /* ( -A_Q28[] & 0x00003FFF ) << 18 */
A_L_s32x2 = vreinterpret_s32_u32( vshr_n_u32( vreinterpret_u32_s32( A_L_s32x2 ), 3 ) ); /* ( -A_Q28[] & 0x00003FFF ) << 15 */
A_U_s32x2 = vshr_n_s32( A_Q28_s32x2, 14 ); /* silk_RSHIFT( -A_Q28[], 14 ) */
A_U_s32x2 = vshl_n_s32( A_U_s32x2, 16 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 16 (Clip two leading bits to conform to C function.) */
A_U_s32x2 = vshr_n_s32( A_U_s32x2, 1 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 15 */
B_Q28_s32x2 = vld1_s32( B_Q28 );
t_s32x2 = vld1_s32( B_Q28 + 1 );
t0_s32x2x2 = vzip_s32( A_L_s32x2, A_L_s32x2 );
t1_s32x2x2 = vzip_s32( A_U_s32x2, A_U_s32x2 );
t2_s32x2x2 = vzip_s32( t_s32x2, t_s32x2 );
A_L_s32x4 = vcombine_s32( t0_s32x2x2.val[ 0 ], t0_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_L_Q28 */
A_U_s32x4 = vcombine_s32( t1_s32x2x2.val[ 0 ], t1_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_U_Q28 */
B_Q28_s32x4 = vcombine_s32( t2_s32x2x2.val[ 0 ], t2_s32x2x2.val[ 1 ] ); /* B_Q28[ {1,1,2,2} ] */
S_s32x4 = vld1q_s32( S ); /* S0 = S[ 0 ]; S3 = S[ 3 ]; */
S_s32x2x2 = vtrn_s32( vget_low_s32( S_s32x4 ), vget_high_s32( S_s32x4 ) ); /* S2 = S[ 1 ]; S1 = S[ 2 ]; */
S_s32x4 = vcombine_s32( S_s32x2x2.val[ 0 ], S_s32x2x2.val[ 1 ] );
for( ; k < len - 1; k += 2 ) {
int32x4_t in_s32x4[ 2 ], t_s32x4;
int32x2_t out32_Q14_s32x2[ 2 ];
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ], S[ 2 * i + 2 ], S[ 2 * i + 3 ]: Q12 */
in_s16x4 = vld1_s16( &in[ 2 * k ] ); /* in{0,1,2,3} = in[ 2 * k + {0,1,2,3} ]; */
in_s32x4[ 0 ] = vshll_n_s16( in_s16x4, 15 ); /* in{0,1,2,3} << 15 */
t_s32x4 = vqdmulhq_lane_s32( in_s32x4[ 0 ], B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1,2,3} ) */
in_s32x4[ 1 ] = vcombine_s32( vget_high_s32( in_s32x4[ 0 ] ), vget_high_s32( in_s32x4[ 0 ] ) ); /* in{2,3,2,3} << 15 */
in_s32x4[ 0 ] = vcombine_s32( vget_low_s32 ( in_s32x4[ 0 ] ), vget_low_s32 ( in_s32x4[ 0 ] ) ); /* in{0,1,0,1} << 15 */
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_low_s32 ( t_s32x4 ), in_s32x4[ 0 ], &S_s32x4, &out32_Q14_s32x2[ 0 ] );
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_high_s32( t_s32x4 ), in_s32x4[ 1 ], &S_s32x4, &out32_Q14_s32x2[ 1 ] );
/* Scale back to Q0 and saturate */
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2[ 0 ], out32_Q14_s32x2[ 1 ] ); /* out32_Q14_{0,1,2,3} */
out32_Q14_s32x4 = vaddq_s32( out32_Q14_s32x4, offset_s32x4 ); /* out32_Q14_{0,1,2,3} + (1<<14) - 1 */
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ) */
vst1_s16( &out[ 2 * k ], out_s16x4 ); /* out[ 2 * k + {0,1,2,3} ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ); */
}
/* Process leftover. */
if( k < len ) {
int32x4_t in_s32x4;
int32x2_t out32_Q14_s32x2;
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ]: Q12 */
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 0 ], in_s16x4, 0 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 1 ], in_s16x4, 1 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
in_s32x4 = vshll_n_s16( in_s16x4, 15 ); /* in{0,1} << 15 */
t_s32x2 = vqdmulh_lane_s32( vget_low_s32( in_s32x4 ), B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1} ) */
in_s32x4 = vcombine_s32( vget_low_s32( in_s32x4 ), vget_low_s32( in_s32x4 ) ); /* in{0,1,0,1} << 15 */
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, t_s32x2, in_s32x4, &S_s32x4, &out32_Q14_s32x2 );
/* Scale back to Q0 and saturate */
out32_Q14_s32x2 = vadd_s32( out32_Q14_s32x2, offset_s32x2 ); /* out32_Q14_{0,1} + (1<<14) - 1 */
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2, out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} + (1<<14) - 1 */
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,0,1} + (1<<14) - 1, 14 ) ) */
vst1_lane_s16( &out[ 2 * k + 0 ], out_s16x4, 0 ); /* out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_0 + (1<<14) - 1, 14 ) ); */
vst1_lane_s16( &out[ 2 * k + 1 ], out_s16x4, 1 ); /* out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_1 + (1<<14) - 1, 14 ) ); */
}
vst1q_lane_s32( &S[ 0 ], S_s32x4, 0 ); /* S[ 0 ] = S0; */
vst1q_lane_s32( &S[ 1 ], S_s32x4, 2 ); /* S[ 1 ] = S2; */
vst1q_lane_s32( &S[ 2 ], S_s32x4, 1 ); /* S[ 2 ] = S1; */
vst1q_lane_s32( &S[ 3 ], S_s32x4, 3 ); /* S[ 3 ] = S3; */
#ifdef OPUS_CHECK_ASM
silk_assert( !memcmp( S_c, S, sizeof( S_c ) ) );
silk_assert( !memcmp( out_c, out, 2 * len * sizeof( opus_int16 ) ) );
RESTORE_STACK;
#endif
}
+10 -3
View File
@@ -28,6 +28,11 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef SILK_MACROS_ARMv4_H
#define SILK_MACROS_ARMv4_H
/* This macro only avoids the undefined behaviour from a left shift of
a negative value. It should only be used in macros that can't include
SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
#undef silk_SMULWB
static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
@@ -38,7 +43,7 @@ static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
"#silk_SMULWB\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b<<16)
: "%r"(a), "r"(SAFE_SHL(b,16))
);
return rd_hi;
}
@@ -80,7 +85,7 @@ static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b)
);
return (rd_hi<<16)+(rd_lo>>16);
return SAFE_SHL(rd_hi,16)+(rd_lo>>16);
}
#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
@@ -96,8 +101,10 @@ static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(c)
);
return a+(rd_hi<<16)+(rd_lo>>16);
return a+SAFE_SHL(rd_hi,16)+(rd_lo>>16);
}
#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
#undef SAFE_SHL
#endif /* SILK_MACROS_ARMv4_H */

Some files were not shown because too many files have changed in this diff Show More