update libaom to rev 0ec86ac7ae1e32a7e70410fa4972a655ec3670a4 (without moz.build and aom_ports/aom_once.h)

This commit is contained in:
2019-02-01 21:11:22 +08:00
parent 3abe807b64
commit b246b0a6ee
92 changed files with 3631 additions and 3728 deletions
+1 -1
View File
@@ -10,4 +10,4 @@ The upstream aom git repository is:
https://aomedia.googlesource.com/aom
The git commit ID used was b46542180d551d5e4eb666cf35dd62395ba43f3e.
The git commit ID used was 0ec86ac7ae1e32a7e70410fa4972a655ec3670a4.
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@
.equ CONFIG_BIG_ENDIAN, 0
.equ CONFIG_BITSTREAM_DEBUG, 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING, 0
.equ CONFIG_COLLECT_INTER_MODE_RD_STATS, 0
.equ CONFIG_COLLECT_PARTITION_STATS, 0
.equ CONFIG_COLLECT_RD_STATS, 0
.equ CONFIG_DEBUG, 0
.equ CONFIG_DENOISE, 1
@@ -30,7 +30,6 @@
.equ CONFIG_DIST_8X8, 0
.equ CONFIG_ENTROPY_STATS, 0
.equ CONFIG_FILEOPTIONS, 1
.equ CONFIG_FIX_GF_LENGTH, 1
.equ CONFIG_FP_MB_STATS, 0
.equ CONFIG_GCC, 1
.equ CONFIG_GCOV, 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
CONFIG_BIG_ENDIAN equ 0
CONFIG_BITSTREAM_DEBUG equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
CONFIG_COLLECT_PARTITION_STATS equ 0
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 1
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
CONFIG_DIST_8X8 equ 0
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1
CONFIG_FIX_GF_LENGTH equ 1
CONFIG_FP_MB_STATS equ 0
CONFIG_GCC equ 1
CONFIG_GCOV equ 0
@@ -24,7 +24,7 @@
#define CONFIG_BIG_ENDIAN 0
#define CONFIG_BITSTREAM_DEBUG 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
#define CONFIG_COLLECT_PARTITION_STATS 0
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 1
@@ -32,7 +32,6 @@
#define CONFIG_DIST_8X8 0
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1
#define CONFIG_FIX_GF_LENGTH 1
#define CONFIG_FP_MB_STATS 0
#define CONFIG_GCC 1
#define CONFIG_GCOV 0
+25
View File
@@ -973,9 +973,22 @@ enum aome_enc_control_id {
/*!\brief Control to use a reduced tx type set */
AV1E_SET_REDUCED_TX_TYPE_SET,
/*!\brief Control to use dct only for intra modes */
AV1E_SET_INTRA_DCT_ONLY,
/*!\brief Control to use dct only for inter modes */
AV1E_SET_INTER_DCT_ONLY,
/*!\brief Control to use adaptive quantize_b */
AV1E_SET_QUANT_B_ADAPT,
/*!\brief Control to select maximum height for the GF group pyramid structure
* (valid values: 1 - 4) */
AV1E_SET_GF_MAX_PYRAMID_HEIGHT,
/*!\brief Control to select maximum reference frames allowed per frame
* (valid values: 3 - 7) */
AV1E_SET_MAX_REFERENCE_FRAMES,
};
/*!\brief aom 1-D scaling mode
@@ -1350,9 +1363,21 @@ AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SUBSAMPLING_Y, unsigned int)
AOM_CTRL_USE_TYPE(AV1E_SET_REDUCED_TX_TYPE_SET, unsigned int)
#define AOM_CTRL_AV1E_SET_REDUCED_TX_TYPE_SET
AOM_CTRL_USE_TYPE(AV1E_SET_INTRA_DCT_ONLY, unsigned int)
#define AOM_CTRL_AV1E_SET_INTRA_DCT_ONLY
AOM_CTRL_USE_TYPE(AV1E_SET_INTER_DCT_ONLY, unsigned int)
#define AOM_CTRL_AV1E_SET_INTER_DCT_ONLY
AOM_CTRL_USE_TYPE(AV1E_SET_QUANT_B_ADAPT, unsigned int)
#define AOM_CTRL_AV1E_SET_QUANT_B_ADAPT
AOM_CTRL_USE_TYPE(AV1E_SET_GF_MAX_PYRAMID_HEIGHT, unsigned int)
#define AOM_CTRL_AV1E_SET_GF_MAX_PYRAMID_HEIGHT
AOM_CTRL_USE_TYPE(AV1E_SET_MAX_REFERENCE_FRAMES, unsigned int)
#define AOM_CTRL_AV1E_SET_MAX_REFERENCE_FRAMES
/*!\endcond */
/*! @} - end defgroup aom_encoder */
#ifdef __cplusplus
+1
View File
@@ -226,6 +226,7 @@ if(CONFIG_AV1_ENCODER)
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.h"
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/quantize_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/variance_impl_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")
+1
View File
@@ -522,6 +522,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_quantize_b_64x64 ssse3/;
} # CONFIG_AV1_ENCODER
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+194
View File
@@ -12,6 +12,68 @@
#include "aom_dsp/quantize.h"
#include "aom_mem/aom_mem.h"
void quantize_b_adaptive_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int i, non_zero_count = (int)n_coeffs, eob = -1;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
// Pre-scan pass
for (i = (int)n_coeffs - 1; i >= 0; i--) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
const int coeff = coeff_ptr[rc] * wt;
int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
non_zero_count--;
else
break;
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < non_zero_count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp32;
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
int64_t tmp =
clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
INT16_MIN, INT16_MAX);
tmp *= wt;
tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >>
(16 - log_scale + AOM_QM_BITS)); // quantization
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
}
void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -74,6 +136,64 @@ void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
void highbd_quantize_b_adaptive_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale) {
int i, eob = -1;
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int dequant;
int idx_arr[4096];
(void)iscan;
int idx = 0;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
const int coeff = coeff_ptr[rc] * wt;
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp1 =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
const int64_t tmpw = tmp1 * wt;
const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
(16 - log_scale + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (abs_qcoeff) eob = idx_arr[i];
}
*eob_ptr = eob + 1;
}
void highbd_quantize_b_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -133,6 +253,80 @@ void highbd_quantize_b_helper_c(
/* These functions should only be called when quantisation matrices
are not used. */
void aom_quantize_b_adaptive_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 0);
}
void aom_quantize_b_32x32_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 1);
}
void aom_quantize_b_64x64_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 2);
}
void aom_highbd_quantize_b_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
iscan, NULL, NULL, 0);
}
void aom_highbd_quantize_b_32x32_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
iscan, NULL, NULL, 1);
}
void aom_highbd_quantize_b_64x64_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
iscan, NULL, NULL, 2);
}
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+60
View File
@@ -20,6 +20,66 @@
extern "C" {
#endif
void quantize_b_adaptive_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale);
void aom_quantize_b_adaptive_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void aom_quantize_b_32x32_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void aom_quantize_b_64x64_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void highbd_quantize_b_adaptive_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale);
void aom_highbd_quantize_b_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void aom_highbd_quantize_b_32x32_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void aom_highbd_quantize_b_64x64_adaptive_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan);
void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
-22
View File
@@ -18,28 +18,6 @@
#include "aom/aom_integer.h"
#include "aom_dsp/x86/quantize_x86.h"
static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
assert(sizeof(tran_low_t) == 4);
return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
(int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
(int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
(int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
}
static INLINE void store_coefficients(__m128i coeff_vals,
tran_low_t *coeff_ptr) {
assert(sizeof(tran_low_t) == 4);
__m128i one = _mm_set1_epi16(1);
__m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
__m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
__m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
__m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
_mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
_mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
}
void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
+192
View File
@@ -0,0 +1,192 @@
/*
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <tmmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#include "config/aom_dsp_rtcd.h"
#include "aom/aom_integer.h"
#include "aom_dsp/x86/quantize_x86.h"
static INLINE void calculate_qcoeff_64x64(__m128i *coeff, const __m128i round,
const __m128i quant,
const __m128i *shift) {
__m128i tmp, qcoeff, tmp1;
qcoeff = _mm_adds_epi16(*coeff, round);
tmp = _mm_mulhi_epi16(qcoeff, quant);
qcoeff = _mm_add_epi16(tmp, qcoeff);
tmp = _mm_mullo_epi16(qcoeff, *shift);
tmp = _mm_srli_epi16(tmp, 14);
tmp1 = _mm_mulhi_epi16(qcoeff, *shift);
tmp1 = _mm_slli_epi16(tmp1, 2);
*coeff = _mm_or_si128(tmp, tmp1);
}
static INLINE void calculate_dqcoeff_and_store_64x64(const __m128i qcoeff,
const __m128i dequant,
const __m128i zero,
tran_low_t *dqcoeff) {
// Un-sign to bias rounding like C.
const __m128i coeff = _mm_abs_epi16(qcoeff);
const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
const __m128i low = _mm_mullo_epi16(coeff, dequant);
const __m128i high = _mm_mulhi_epi16(coeff, dequant);
__m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high);
__m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high);
// "Divide" by 4.
dqcoeff32_0 = _mm_srli_epi32(dqcoeff32_0, 2);
dqcoeff32_1 = _mm_srli_epi32(dqcoeff32_1, 2);
dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
_mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
_mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
}
void aom_quantize_b_64x64_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const __m128i zero = _mm_setzero_si128();
const __m128i one = _mm_set1_epi16(1);
const __m128i two = _mm_set1_epi16(2);
int index;
__m128i zbin, round, quant, dequant, shift;
__m128i coeff0, coeff1, qcoeff0, qcoeff1;
__m128i cmp_mask0, cmp_mask1, all_zero;
__m128i eob = zero, eob0;
(void)scan;
(void)n_coeffs;
// Setup global values.
zbin = _mm_load_si128((const __m128i *)zbin_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
shift = _mm_load_si128((const __m128i *)quant_shift_ptr);
// Shift with rounding.
zbin = _mm_add_epi16(zbin, two);
round = _mm_add_epi16(round, two);
zbin = _mm_srli_epi16(zbin, 2);
round = _mm_srli_epi16(round, 2);
zbin = _mm_sub_epi16(zbin, one);
// Do DC and first 15 AC.
coeff0 = load_coefficients(coeff_ptr);
coeff1 = load_coefficients(coeff_ptr + 8);
qcoeff0 = _mm_abs_epi16(coeff0);
qcoeff1 = _mm_abs_epi16(coeff1);
cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
zbin = _mm_unpackhi_epi64(zbin, zbin);
cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
all_zero = _mm_or_si128(cmp_mask0, cmp_mask1);
if (_mm_movemask_epi8(all_zero) == 0) {
_mm_store_si128((__m128i *)(qcoeff_ptr), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + 4), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + 8), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + 12), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + 4), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + 8), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + 12), zero);
round = _mm_unpackhi_epi64(round, round);
quant = _mm_unpackhi_epi64(quant, quant);
shift = _mm_unpackhi_epi64(shift, shift);
dequant = _mm_unpackhi_epi64(dequant, dequant);
} else {
calculate_qcoeff_64x64(&qcoeff0, round, quant, &shift);
round = _mm_unpackhi_epi64(round, round);
quant = _mm_unpackhi_epi64(quant, quant);
shift = _mm_unpackhi_epi64(shift, shift);
calculate_qcoeff_64x64(&qcoeff1, round, quant, &shift);
// Reinsert signs.
qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0);
qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1);
// Mask out zbin threshold coeffs.
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
store_coefficients(qcoeff0, qcoeff_ptr);
store_coefficients(qcoeff1, qcoeff_ptr + 8);
calculate_dqcoeff_and_store_64x64(qcoeff0, dequant, zero, dqcoeff_ptr);
dequant = _mm_unpackhi_epi64(dequant, dequant);
calculate_dqcoeff_and_store_64x64(qcoeff1, dequant, zero, dqcoeff_ptr + 8);
eob =
scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero);
}
// AC only loop.
for (index = 16; index < 1024; index += 16) {
coeff0 = load_coefficients(coeff_ptr + index);
coeff1 = load_coefficients(coeff_ptr + index + 8);
qcoeff0 = _mm_abs_epi16(coeff0);
qcoeff1 = _mm_abs_epi16(coeff1);
cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
all_zero = _mm_or_si128(cmp_mask0, cmp_mask1);
if (_mm_movemask_epi8(all_zero) == 0) {
_mm_store_si128((__m128i *)(qcoeff_ptr + index), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 4), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 8), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 12), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + index), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 4), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 8), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 12), zero);
continue;
}
calculate_qcoeff_64x64(&qcoeff0, round, quant, &shift);
calculate_qcoeff_64x64(&qcoeff1, round, quant, &shift);
qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0);
qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1);
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
store_coefficients(qcoeff0, qcoeff_ptr + index);
store_coefficients(qcoeff1, qcoeff_ptr + index + 8);
calculate_dqcoeff_and_store_64x64(qcoeff0, dequant, zero,
dqcoeff_ptr + index);
calculate_dqcoeff_and_store_64x64(qcoeff1, dequant, zero,
dqcoeff_ptr + 8 + index);
eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index,
zero);
eob = _mm_max_epi16(eob, eob0);
}
*eob_ptr = accumulate_eob(eob);
}
+20
View File
@@ -75,3 +75,23 @@ static INLINE int16_t accumulate_eob(__m128i eob) {
eob = _mm_max_epi16(eob, eob_shuffled);
return _mm_extract_epi16(eob, 1);
}
static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
assert(sizeof(tran_low_t) == 4);
const __m128i coeff1 = _mm_load_si128((__m128i *)(coeff_ptr));
const __m128i coeff2 = _mm_load_si128((__m128i *)(coeff_ptr + 4));
return _mm_packs_epi32(coeff1, coeff2);
}
static INLINE void store_coefficients(__m128i coeff_vals,
tran_low_t *coeff_ptr) {
assert(sizeof(tran_low_t) == 4);
__m128i one = _mm_set1_epi16(1);
__m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
__m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
__m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
__m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
_mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
_mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
}
-82
View File
@@ -167,35 +167,7 @@ static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
}
static INLINE void transpose_16bit_16x8_avx2(const __m256i *const in,
__m256i *const out) {
const __m256i a0 = _mm256_unpacklo_epi16(in[0], in[1]);
const __m256i a1 = _mm256_unpacklo_epi16(in[2], in[3]);
const __m256i a2 = _mm256_unpacklo_epi16(in[4], in[5]);
const __m256i a3 = _mm256_unpacklo_epi16(in[6], in[7]);
const __m256i a4 = _mm256_unpackhi_epi16(in[0], in[1]);
const __m256i a5 = _mm256_unpackhi_epi16(in[2], in[3]);
const __m256i a6 = _mm256_unpackhi_epi16(in[4], in[5]);
const __m256i a7 = _mm256_unpackhi_epi16(in[6], in[7]);
const __m256i b0 = _mm256_unpacklo_epi32(a0, a1);
const __m256i b1 = _mm256_unpacklo_epi32(a2, a3);
const __m256i b2 = _mm256_unpacklo_epi32(a4, a5);
const __m256i b3 = _mm256_unpacklo_epi32(a6, a7);
const __m256i b4 = _mm256_unpackhi_epi32(a0, a1);
const __m256i b5 = _mm256_unpackhi_epi32(a2, a3);
const __m256i b6 = _mm256_unpackhi_epi32(a4, a5);
const __m256i b7 = _mm256_unpackhi_epi32(a6, a7);
out[0] = _mm256_unpacklo_epi64(b0, b1);
out[1] = _mm256_unpackhi_epi64(b0, b1);
out[2] = _mm256_unpacklo_epi64(b4, b5);
out[3] = _mm256_unpackhi_epi64(b4, b5);
out[4] = _mm256_unpacklo_epi64(b2, b3);
out[5] = _mm256_unpackhi_epi64(b2, b3);
out[6] = _mm256_unpacklo_epi64(b6, b7);
out[7] = _mm256_unpackhi_epi64(b6, b7);
}
static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
for (int i = 0; i < size; ++i) {
out[size - i - 1] = in[i];
@@ -263,61 +235,7 @@ static INLINE void av1_round_shift_rect_array_32_avx2(__m256i *input,
}
}
}
static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
const __m256i scale_rounding =
pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
const __m256i b = _mm256_madd_epi16(a, scale_rounding);
return _mm256_srai_epi32(b, NewSqrt2Bits);
}
static INLINE void store_rect_16bit_to_32bit_w8_avx2(const __m256i a,
int32_t *const b) {
const __m256i one = _mm256_set1_epi16(1);
const __m256i a_lo = _mm256_unpacklo_epi16(a, one);
const __m256i a_hi = _mm256_unpackhi_epi16(a, one);
const __m256i b_lo = scale_round_avx2(a_lo, NewSqrt2);
const __m256i b_hi = scale_round_avx2(a_hi, NewSqrt2);
const __m256i temp = _mm256_permute2f128_si256(b_lo, b_hi, 0x31);
_mm_store_si128((__m128i *)b, _mm256_castsi256_si128(b_lo));
_mm_store_si128((__m128i *)(b + 4), _mm256_castsi256_si128(b_hi));
_mm256_store_si256((__m256i *)(b + 64), temp);
}
static INLINE void store_rect_buffer_16bit_to_32bit_w8_avx2(
const __m256i *const in, int32_t *const out, const int stride,
const int out_size) {
for (int i = 0; i < out_size; ++i) {
store_rect_16bit_to_32bit_w8_avx2(in[i], out + i * stride);
}
}
static INLINE void pack_reg(const __m128i *in1, const __m128i *in2,
__m256i *out) {
out[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[0]), in2[0], 0x1);
out[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[1]), in2[1], 0x1);
out[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[2]), in2[2], 0x1);
out[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[3]), in2[3], 0x1);
out[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[4]), in2[4], 0x1);
out[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[5]), in2[5], 0x1);
out[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[6]), in2[6], 0x1);
out[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[7]), in2[7], 0x1);
}
static INLINE void extract_reg(const __m256i *in, __m128i *out1) {
out1[0] = _mm256_castsi256_si128(in[0]);
out1[1] = _mm256_castsi256_si128(in[1]);
out1[2] = _mm256_castsi256_si128(in[2]);
out1[3] = _mm256_castsi256_si128(in[3]);
out1[4] = _mm256_castsi256_si128(in[4]);
out1[5] = _mm256_castsi256_si128(in[5]);
out1[6] = _mm256_castsi256_si128(in[6]);
out1[7] = _mm256_castsi256_si128(in[7]);
out1[8] = _mm256_extractf128_si256(in[0], 0x01);
out1[9] = _mm256_extractf128_si256(in[1], 0x01);
out1[10] = _mm256_extractf128_si256(in[2], 0x01);
out1[11] = _mm256_extractf128_si256(in[3], 0x01);
out1[12] = _mm256_extractf128_si256(in[4], 0x01);
out1[13] = _mm256_extractf128_si256(in[5], 0x01);
out1[14] = _mm256_extractf128_si256(in[6], 0x01);
out1[15] = _mm256_extractf128_si256(in[7], 0x01);
}
#ifdef __cplusplus
}
#endif
+1 -1
View File
@@ -26,7 +26,7 @@ extern "C" {
#define AOM_INTERP_EXTEND 4
#define AOM_BORDER_IN_PIXELS 288
#define AOM_ENC_NO_SCALE_BORDER 160
#define AOM_DEC_BORDER_IN_PIXELS 288
#define AOM_DEC_BORDER_IN_PIXELS 64
typedef struct yv12_buffer_config {
union {
+8 -1
View File
@@ -484,6 +484,7 @@ static int main_loop(int argc, const char **argv_) {
input.webm_ctx = &webm_ctx;
#endif
struct ObuDecInputContext obu_ctx = { NULL, NULL, 0, 0, 0 };
int is_ivf = 0;
obu_ctx.avx_ctx = &aom_input_ctx;
input.obu_ctx = &obu_ctx;
@@ -610,8 +611,10 @@ static int main_loop(int argc, const char **argv_) {
#endif
input.aom_input_ctx->filename = fn;
input.aom_input_ctx->file = infile;
if (file_is_ivf(input.aom_input_ctx))
if (file_is_ivf(input.aom_input_ctx)) {
input.aom_input_ctx->file_type = FILE_TYPE_IVF;
is_ivf = 1;
}
#if CONFIG_WEBM_IO
else if (file_is_webm(input.webm_ctx, input.aom_input_ctx))
input.aom_input_ctx->file_type = FILE_TYPE_WEBM;
@@ -661,6 +664,10 @@ static int main_loop(int argc, const char **argv_) {
}
fourcc_interface = get_aom_decoder_by_fourcc(aom_input_ctx.fourcc);
if (is_ivf && !fourcc_interface)
fatal("Unsupported fourcc: %x\n", aom_input_ctx.fourcc);
if (interface && fourcc_interface && interface != fourcc_interface)
warn("Header indicates codec: %s\n", fourcc_interface->name);
else
+27 -5
View File
@@ -263,9 +263,9 @@ static const arg_def_t global_error_resilient =
"Enable global error resiliency features");
static const arg_def_t lag_in_frames =
ARG_DEF(NULL, "lag-in-frames", 1, "Max number of frames to lag");
static const arg_def_t large_scale_tile =
ARG_DEF(NULL, "large-scale-tile", 1,
"Large scale tile coding (0: off (default), 1: on)");
static const arg_def_t large_scale_tile = ARG_DEF(
NULL, "large-scale-tile", 1,
"Large scale tile coding (0: off (default), 1: on (ivf output only))");
static const arg_def_t monochrome =
ARG_DEF(NULL, "monochrome", 0, "Monochrome video (no chroma planes)");
static const arg_def_t full_still_picture_hdr = ARG_DEF(
@@ -532,6 +532,12 @@ static const arg_def_t qm_max = ARG_DEF(
NULL, "qm-max", 1, "Max quant matrix flatness (0..15), default is 15");
static const arg_def_t reduced_tx_type_set = ARG_DEF(
NULL, "reduced-tx-type-set", 1, "Use reduced set of transform types");
static const arg_def_t use_intra_dct_only =
ARG_DEF(NULL, "use-intra-dct-only", 1, "Use DCT only for INTRA modes");
static const arg_def_t use_inter_dct_only =
ARG_DEF(NULL, "use-inter-dct-only", 1, "Use DCT only for INTER modes");
static const arg_def_t quant_b_adapt =
ARG_DEF(NULL, "quant-b-adapt", 1, "Use adaptive quantize_b");
#if CONFIG_DIST_8X8
static const arg_def_t enable_dist_8x8 =
ARG_DEF(NULL, "enable-dist-8x8", 1,
@@ -602,6 +608,9 @@ static const arg_def_t max_gf_interval = ARG_DEF(
static const arg_def_t gf_max_pyr_height =
ARG_DEF(NULL, "gf-max-pyr-height", 1,
"maximum height for GF group pyramid structure (1 to 4 (default))");
static const arg_def_t max_reference_frames = ARG_DEF(
NULL, "max-reference-frames", 1,
"maximum number of reference frames allowed per frame (3 to 7 (default))");
static const struct arg_enum_list color_primaries_enum[] = {
{ "bt709", AOM_CICP_CP_BT_709 },
@@ -752,6 +761,9 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
&qm_min,
&qm_max,
&reduced_tx_type_set,
&use_intra_dct_only,
&use_inter_dct_only,
&quant_b_adapt,
#if CONFIG_DIST_8X8
&enable_dist_8x8,
#endif
@@ -779,7 +791,8 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
#if CONFIG_DENOISE
&denoise_noise_level,
&denoise_block_size,
#endif
#endif // CONFIG_DENOISE
&max_reference_frames,
&enable_ref_frame_mvs,
&bitdeptharg,
&inbitdeptharg,
@@ -834,6 +847,9 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
AV1E_SET_QM_MIN,
AV1E_SET_QM_MAX,
AV1E_SET_REDUCED_TX_TYPE_SET,
AV1E_SET_INTRA_DCT_ONLY,
AV1E_SET_INTER_DCT_ONLY,
AV1E_SET_QUANT_B_ADAPT,
#if CONFIG_DIST_8X8
AV1E_SET_ENABLE_DIST_8X8,
#endif
@@ -861,7 +877,8 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
#if CONFIG_DENOISE
AV1E_SET_DENOISE_NOISE_LEVEL,
AV1E_SET_DENOISE_BLOCK_SIZE,
#endif
#endif // CONFIG_DENOISE
AV1E_SET_MAX_REFERENCE_FRAMES,
AV1E_SET_ENABLE_REF_FRAME_MVS,
0 };
#endif // CONFIG_AV1_ENCODER
@@ -1340,6 +1357,7 @@ static int parse_stream_params(struct AvxEncoderConfig *global,
config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
} else if (arg_match(&arg, &large_scale_tile, argi)) {
config->cfg.large_scale_tile = arg_parse_uint(&arg);
if (config->cfg.large_scale_tile) global->codec = get_aom_lst_encoder();
} else if (arg_match(&arg, &monochrome, argi)) {
config->cfg.monochrome = 1;
} else if (arg_match(&arg, &full_still_picture_hdr, argi)) {
@@ -2097,6 +2115,10 @@ int main(int argc, const char **argv_) {
FOREACH_STREAM(stream, streams) {
check_encoder_config(global.disable_warning_prompt, &global,
&stream->config.cfg);
// If large_scale_tile = 1, only support to output to ivf format.
if (stream->config.cfg.large_scale_tile && !stream->config.write_ivf)
die("only support ivf output format while large-scale-tile=1\n");
}
/* Handle non-option arguments */
+2
View File
@@ -191,6 +191,8 @@ list(APPEND AOM_AV1_ENCODER_SOURCES
"${AOM_ROOT}/av1/encoder/temporal_filter.h"
"${AOM_ROOT}/av1/encoder/tokenize.c"
"${AOM_ROOT}/av1/encoder/tokenize.h"
"${AOM_ROOT}/av1/encoder/tpl_model.c"
"${AOM_ROOT}/av1/encoder/tpl_model.h"
"${AOM_ROOT}/av1/encoder/wedge_utils.c"
"${AOM_ROOT}/third_party/fastfeat/fast.c"
"${AOM_ROOT}/third_party/fastfeat/fast.h"
+49
View File
@@ -96,6 +96,7 @@ struct av1_extracfg {
int enable_order_hint; // enable order hint for sequence
int enable_tx64; // enable 64-pt transform usage for sequence
int enable_dist_wtd_comp; // enable dist wtd compound for sequence
int max_reference_frames; // maximum number of references per frame
int enable_ref_frame_mvs; // sequence level
int allow_ref_frame_mvs; // frame level
int enable_masked_comp; // enable masked compound for sequence
@@ -123,6 +124,9 @@ struct av1_extracfg {
unsigned int chroma_subsampling_x;
unsigned int chroma_subsampling_y;
int reduced_tx_type_set;
int use_intra_dct_only;
int use_inter_dct_only;
int quant_b_adapt;
};
static struct av1_extracfg default_extra_cfg = {
@@ -190,6 +194,7 @@ static struct av1_extracfg default_extra_cfg = {
1, // frame order hint
1, // enable 64-pt transform usage
1, // dist-wtd compound
7, // max_reference_frames
1, // enable_ref_frame_mvs sequence level
1, // allow ref_frame_mvs frame level
1, // enable masked compound at sequence level
@@ -216,6 +221,9 @@ static struct av1_extracfg default_extra_cfg = {
0, // chroma_subsampling_x
0, // chroma_subsampling_y
0, // reduced_tx_type_set
0, // use_intra_dct_only
0, // use_inter_dct_only
0, // quant_b_adapt
};
struct aom_codec_alg_priv {
@@ -419,6 +427,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
#endif
}
RANGE_CHECK(extra_cfg, max_reference_frames, 3, 7);
RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1);
RANGE_CHECK_HI(extra_cfg, chroma_subsampling_y, 1);
@@ -571,6 +580,9 @@ static aom_codec_err_t set_encoder_config(
oxcf->qm_minlevel = extra_cfg->qm_min;
oxcf->qm_maxlevel = extra_cfg->qm_max;
oxcf->reduced_tx_type_set = extra_cfg->reduced_tx_type_set;
oxcf->use_intra_dct_only = extra_cfg->use_intra_dct_only;
oxcf->use_inter_dct_only = extra_cfg->use_inter_dct_only;
oxcf->quant_b_adapt = extra_cfg->quant_b_adapt;
#if CONFIG_DIST_8X8
oxcf->using_dist_8x8 = extra_cfg->enable_dist_8x8;
if (extra_cfg->tuning == AOM_TUNE_CDEF_DIST ||
@@ -711,6 +723,11 @@ static aom_codec_err_t set_encoder_config(
oxcf->enable_order_hint = extra_cfg->enable_order_hint;
oxcf->enable_dist_wtd_comp =
extra_cfg->enable_dist_wtd_comp & extra_cfg->enable_order_hint;
oxcf->max_reference_frames = extra_cfg->max_reference_frames;
if (oxcf->max_reference_frames > 3 && oxcf->max_reference_frames < 7) {
// TODO(urvang): Enable all possible values, after they work properly.
oxcf->max_reference_frames = 3;
}
oxcf->enable_masked_comp = extra_cfg->enable_masked_comp;
oxcf->enable_diff_wtd_comp =
extra_cfg->enable_masked_comp & extra_cfg->enable_diff_wtd_comp;
@@ -1109,6 +1126,13 @@ static aom_codec_err_t ctrl_set_enable_dist_wtd_comp(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_max_reference_frames(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.max_reference_frames = CAST(AV1E_SET_MAX_REFERENCE_FRAMES, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_enable_ref_frame_mvs(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1289,6 +1313,27 @@ static aom_codec_err_t ctrl_set_reduced_tx_type_set(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_intra_dct_only(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.use_intra_dct_only = CAST(AV1E_SET_INTRA_DCT_ONLY, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_inter_dct_only(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.use_inter_dct_only = CAST(AV1E_SET_INTER_DCT_ONLY, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_quant_b_adapt(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.quant_b_adapt = CAST(AV1E_SET_QUANT_B_ADAPT, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_film_grain_test_vector(
aom_codec_alg_priv_t *ctx, va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1987,6 +2032,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ AV1E_SET_ENABLE_ORDER_HINT, ctrl_set_enable_order_hint },
{ AV1E_SET_ENABLE_TX64, ctrl_set_enable_tx64 },
{ AV1E_SET_ENABLE_DIST_WTD_COMP, ctrl_set_enable_dist_wtd_comp },
{ AV1E_SET_MAX_REFERENCE_FRAMES, ctrl_set_max_reference_frames },
{ AV1E_SET_ENABLE_REF_FRAME_MVS, ctrl_set_enable_ref_frame_mvs },
{ AV1E_SET_ALLOW_REF_FRAME_MVS, ctrl_set_allow_ref_frame_mvs },
{ AV1E_SET_ENABLE_MASKED_COMP, ctrl_set_enable_masked_comp },
@@ -2008,6 +2054,9 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ AV1E_SET_ENABLE_ANGLE_DELTA, ctrl_set_enable_angle_delta },
{ AV1E_SET_AQ_MODE, ctrl_set_aq_mode },
{ AV1E_SET_REDUCED_TX_TYPE_SET, ctrl_set_reduced_tx_type_set },
{ AV1E_SET_INTRA_DCT_ONLY, ctrl_set_intra_dct_only },
{ AV1E_SET_INTER_DCT_ONLY, ctrl_set_inter_dct_only },
{ AV1E_SET_QUANT_B_ADAPT, ctrl_set_quant_b_adapt },
{ AV1E_SET_DELTAQ_MODE, ctrl_set_deltaq_mode },
{ AV1E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
{ AV1E_SET_TUNE_CONTENT, ctrl_set_tune_content },
+4 -2
View File
@@ -243,8 +243,10 @@ typedef struct MB_MODE_INFO {
// Joint sign of alpha Cb and alpha Cr
int cfl_alpha_signs;
int compound_idx;
// Indicate if masked compound is used(1) or not(0).
int comp_group_idx;
// If comp_group_idx=0, indicate if dist_wtd_comp(0) or avg_comp(1) is used.
int compound_idx;
#if CONFIG_INSPECTION
int16_t tx_skip[TXK_TYPE_BUF_LEN];
#endif
@@ -596,7 +598,7 @@ typedef struct macroblockd {
uint8_t *tmp_obmc_bufs[2];
} MACROBLOCKD;
static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
static INLINE int is_cur_buf_hbd(const MACROBLOCKD *xd) {
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
}
+5 -6
View File
@@ -37,7 +37,7 @@ void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
assert(pred_plane < CFL_PRED_PLANES);
assert(width <= CFL_BUF_LINE);
if (get_bitdepth_data_path_index(xd)) {
if (is_cur_buf_hbd(xd)) {
uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input);
memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1);
return;
@@ -69,7 +69,7 @@ void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
assert(pred_plane < CFL_PRED_PLANES);
assert(width <= CFL_BUF_LINE);
assert(height <= CFL_BUF_LINE);
if (get_bitdepth_data_path_index(xd)) {
if (is_cur_buf_hbd(xd)) {
uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride,
width, height);
@@ -196,7 +196,7 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <=
CFL_BUF_SQUARE);
if (get_bitdepth_data_path_index(xd)) {
if (is_cur_buf_hbd(xd)) {
uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
get_predict_hbd_fn(tx_size)(cfl->ac_buf_q3, dst_16, dst_stride, alpha_q3,
xd->bd);
@@ -388,8 +388,7 @@ void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
assert(!((row & 1) && tx_size_high[tx_size] != 4));
sub8x8_adjust_offset(cfl, &row, &col);
}
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size,
get_bitdepth_data_path_index(xd));
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size, is_cur_buf_hbd(xd));
}
void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
@@ -405,5 +404,5 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
tx_size = get_tx_size(width, height);
cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size,
get_bitdepth_data_path_index(xd));
is_cur_buf_hbd(xd));
}
+1 -1
View File
@@ -204,7 +204,7 @@ static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
txfm_param->eob = eob;
txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id];
txfm_param->bd = xd->bd;
txfm_param->is_hbd = get_bitdepth_data_path_index(xd);
txfm_param->is_hbd = is_cur_buf_hbd(xd);
txfm_param->tx_set_type = av1_get_ext_tx_set_type(
txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
}
+2 -2
View File
@@ -378,7 +378,6 @@ typedef struct AV1Common {
int show_frame;
int showable_frame; // frame can be used as show existing frame in future
int show_existing_frame;
int reset_decoder_state;
uint8_t disable_cdf_update;
int allow_high_precision_mv;
@@ -432,6 +431,7 @@ typedef struct AV1Common {
int qm_v;
int min_qmlevel;
int max_qmlevel;
int use_quant_b_adapt;
/* We allocate a MB_MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
@@ -501,7 +501,6 @@ typedef struct AV1Common {
int primary_ref_frame;
int error_resilient_mode;
int force_primary_ref_none;
int tile_cols, tile_rows;
@@ -642,6 +641,7 @@ static INLINE RefCntBuffer *assign_cur_frame_new_fb(AV1_COMMON *const cm) {
if (new_fb_idx == INVALID_IDX) return NULL;
cm->cur_frame = &cm->buffer_pool->frame_bufs[new_fb_idx];
cm->cur_frame->buf.buf_8bit_valid = 0;
return cm->cur_frame;
}
+14 -60
View File
@@ -84,12 +84,11 @@ void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
if (do_warp && xd->cur_frame_force_integer_mv == 0) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const struct buf_2d *const pre_buf = &pd->pre[ref];
av1_warp_plane(&final_warp_params,
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
av1_warp_plane(&final_warp_params, is_cur_buf_hbd(xd), xd->bd,
pre_buf->buf0, pre_buf->width, pre_buf->height,
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
pd->subsampling_x, pd->subsampling_y, conv_params);
} else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
} else if (is_cur_buf_hbd(xd)) {
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
w, h, conv_params, interp_filters, is_intrabc,
xd->bd);
@@ -568,14 +567,15 @@ static void build_masked_compound_no_round(
const int subh = (2 << mi_size_high_log2[sb_type]) == h;
const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (is_cur_buf_hbd(xd)) {
aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
src1_stride, mask, block_size_wide[sb_type],
w, h, subw, subh, conv_params, xd->bd);
else
} else {
aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
src1_stride, mask, block_size_wide[sb_type], w,
h, subw, subh, conv_params);
}
}
void av1_make_masked_inter_predictor(
@@ -800,53 +800,6 @@ void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
return;
}
struct obmc_check_mv_field_ctxt {
MB_MODE_INFO *current_mi;
int mv_field_check_result;
};
static INLINE void obmc_check_identical_mv(MACROBLOCKD *xd, int rel_mi_col,
uint8_t nb_mi_width,
MB_MODE_INFO *nb_mi, void *fun_ctxt,
const int num_planes) {
(void)xd;
(void)rel_mi_col;
(void)nb_mi_width;
(void)num_planes;
struct obmc_check_mv_field_ctxt *ctxt =
(struct obmc_check_mv_field_ctxt *)fun_ctxt;
const MB_MODE_INFO *current_mi = ctxt->current_mi;
if (ctxt->mv_field_check_result == 0) return;
if (nb_mi->ref_frame[0] != current_mi->ref_frame[0] ||
nb_mi->mv[0].as_int != current_mi->mv[0].as_int ||
nb_mi->interp_filters != current_mi->interp_filters) {
ctxt->mv_field_check_result = 0;
}
return;
}
// Check if the neighbors' motions used by obmc have same parameters as for
// the current block. If all the parameters are identical, obmc will produce
// the same prediction as from regular bmc, therefore we can skip the
// overlapping operations for less complexity. The parameters checked include
// reference frame, motion vector, and interpolation filter.
int av1_check_identical_obmc_mv_field(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col) {
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
struct obmc_check_mv_field_ctxt mv_field_check_ctxt = { xd->mi[0], 1 };
foreach_overlappable_nb_above(cm, xd, mi_col,
max_neighbor_obmc[mi_size_wide_log2[bsize]],
obmc_check_identical_mv, &mv_field_check_ctxt);
foreach_overlappable_nb_left(cm, xd, mi_row,
max_neighbor_obmc[mi_size_high_log2[bsize]],
obmc_check_identical_mv, &mv_field_check_ctxt);
return mv_field_check_ctxt.mv_field_check_result;
}
struct obmc_inter_pred_ctxt {
uint8_t **adjacent;
int *adjacent_stride;
@@ -860,7 +813,7 @@ static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
(void)above_mi;
struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
const int is_hbd = is_cur_buf_hbd(xd);
const int overlap =
AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
@@ -897,7 +850,7 @@ static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int overlap =
AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
const int is_hbd = is_cur_buf_hbd(xd);
for (int plane = 0; plane < num_planes; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
@@ -1142,8 +1095,8 @@ static void combine_interintra_highbd(
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
MACROBLOCKD *xd,
BLOCK_SIZE bsize, int plane,
BUFFER_SET *ctx, uint8_t *dst,
int dst_stride) {
const BUFFER_SET *ctx,
uint8_t *dst, int dst_stride) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ssx = xd->plane[plane].subsampling_x;
const int ssy = xd->plane[plane].subsampling_y;
@@ -1166,7 +1119,7 @@ void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
const int ssx = xd->plane[plane].subsampling_x;
const int ssy = xd->plane[plane].subsampling_y;
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
combine_interintra_highbd(
xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
@@ -1185,9 +1138,9 @@ void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
// build interintra_predictors for one plane
void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *pred, int stride,
BUFFER_SET *ctx, int plane,
const BUFFER_SET *ctx, int plane,
BLOCK_SIZE bsize) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
av1_build_intra_predictors_for_interintra(
cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
@@ -1206,7 +1159,8 @@ void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *upred, uint8_t *vpred,
int ustride, int vstride,
BUFFER_SET *ctx, BLOCK_SIZE bsize) {
const BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
}
+4 -5
View File
@@ -161,8 +161,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi);
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
const struct macroblockd_plane *pd, int dir);
int av1_check_identical_obmc_mv_field(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col);
static INLINE int is_interinter_compound_used(COMPOUND_TYPE type,
BLOCK_SIZE sb_type) {
@@ -335,17 +333,18 @@ const uint8_t *av1_get_compound_type_mask(
// build interintra_predictors for one plane
void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *pred, int stride,
BUFFER_SET *ctx, int plane,
const BUFFER_SET *ctx, int plane,
BLOCK_SIZE bsize);
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *upred, uint8_t *vpred,
int ustride, int vstride,
BUFFER_SET *ctx, BLOCK_SIZE bsize);
const BUFFER_SET *ctx,
BLOCK_SIZE bsize);
void av1_build_intra_predictors_for_interintra(
const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
const BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
const uint8_t *inter_pred, int inter_stride,
+2 -2
View File
@@ -1510,7 +1510,7 @@ void av1_predict_intra_block(
xd->color_index_map_offset[plane != 0];
const uint16_t *const palette =
mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
for (r = 0; r < txhpx; ++r) {
for (c = 0; c < txwpx; ++c) {
@@ -1569,7 +1569,7 @@ void av1_predict_intra_block(
tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
build_intra_predictors_high(
xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
filter_intra_mode, tx_size, disable_edge_filter,
+54 -17
View File
@@ -64,6 +64,9 @@
#define ACCT_STR __func__
#define AOM_MIN_THREADS_PER_TILE 1
#define AOM_MAX_THREADS_PER_TILE 2
// This is needed by ext_tile related unit tests.
#define EXT_TILE_DEBUG 1
#define MC_TEMP_BUF_PELS \
@@ -734,7 +737,7 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm,
&scaled_mv, &subpel_x_mv, &subpel_y_mv);
pre = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
src_stride = pre_buf->stride;
highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
highbd = is_cur_buf_hbd(xd);
extend_mc_border(sf, pre_buf, scaled_mv, block, subpel_x_mv,
subpel_y_mv, 0, is_intrabc, highbd, xd->mc_buf[ref],
&pre, &src_stride);
@@ -780,7 +783,7 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm,
&scaled_mv, &subpel_x_mv, &subpel_y_mv);
pre[ref] = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
src_stride[ref] = pre_buf->stride;
highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
highbd = is_cur_buf_hbd(xd);
WarpTypesAllowed warp_types;
warp_types.global_warp_allowed = is_global[ref];
@@ -853,7 +856,7 @@ static void dec_build_inter_predictors_for_planes(const AV1_COMMON *cm,
static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
MACROBLOCKD *xd, int mi_row,
int mi_col, BUFFER_SET *ctx,
int mi_col, const BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
@@ -868,7 +871,7 @@ static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
static void dec_build_inter_predictors_sbuv(const AV1_COMMON *cm,
MACROBLOCKD *xd, int mi_row,
int mi_col, BUFFER_SET *ctx,
int mi_col, const BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
MAX_MB_PLANE - 1);
@@ -1013,7 +1016,7 @@ static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm,
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
dst_buf1[1] =
@@ -3361,6 +3364,20 @@ static int tile_worker_hook(void *arg1, void *arg2) {
return !td->xd.corrupted;
}
static INLINE int get_max_row_mt_workers_per_tile(AV1_COMMON *cm,
TileInfo tile) {
// NOTE: Currently value of max workers is calculated based
// on the parse and decode time. As per the theoretical estimate
// when percentage of parse time is equal to percentage of decode
// time, number of workers needed to parse + decode a tile can not
// exceed more than 2.
// TODO(any): Modify this value if parsing is optimized in future.
int sb_rows = av1_get_sb_rows_in_tile(cm, tile);
int max_workers =
sb_rows == 1 ? AOM_MIN_THREADS_PER_TILE : AOM_MAX_THREADS_PER_TILE;
return max_workers;
}
// The caller must hold pbi->row_mt_mutex_ when calling this function.
// Returns 1 if either the next job is stored in *next_job_info or 1 is stored
// in *end_of_frame.
@@ -3391,8 +3408,8 @@ static int get_next_job_info(AV1Decoder *const pbi,
int min_threads_working = INT_MAX;
int max_mis_to_decode = 0;
int tile_row_idx, tile_col_idx;
int tile_row = 0;
int tile_col = 0;
int tile_row = -1;
int tile_col = -1;
memset(next_job_info, 0, sizeof(*next_job_info));
@@ -3440,7 +3457,9 @@ static int get_next_job_info(AV1Decoder *const pbi,
max_mis_to_decode = 0;
}
if (num_threads_working == min_threads_working &&
num_mis_to_decode > max_mis_to_decode) {
num_mis_to_decode > max_mis_to_decode &&
num_threads_working <
get_max_row_mt_workers_per_tile(cm, tile_data->tile_info)) {
max_mis_to_decode = num_mis_to_decode;
tile_row = tile_row_idx;
tile_col = tile_col_idx;
@@ -3448,6 +3467,8 @@ static int get_next_job_info(AV1Decoder *const pbi,
}
}
}
// No job found to process
if (tile_row == -1 || tile_col == -1) return 0;
tile_data = pbi->tile_data + tile_row * cm->tile_cols + tile_col;
tile_info = tile_data->tile_info;
@@ -3576,9 +3597,22 @@ static int row_mt_worker_hook(void *arg1, void *arg2) {
TileDataDec *const tile_data = cur_job_info->tile_data;
tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data,
allow_update_cdf);
#if CONFIG_MULTITHREAD
pthread_mutex_lock(pbi->row_mt_mutex_);
#endif
tile_data->dec_row_mt_sync.num_threads_working++;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(pbi->row_mt_mutex_);
#endif
// decode tile
parse_tile_row_mt(pbi, td, tile_data);
#if CONFIG_MULTITHREAD
pthread_mutex_lock(pbi->row_mt_mutex_);
#endif
tile_data->dec_row_mt_sync.num_threads_working--;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(pbi->row_mt_mutex_);
#endif
} else {
break;
}
@@ -4055,7 +4089,8 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
int tile_cols_start;
int tile_cols_end;
int tile_count_tg;
int num_workers;
int num_workers = 0;
int max_threads;
const uint8_t *raw_data_end = NULL;
int max_sb_rows = 0;
@@ -4071,7 +4106,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
tile_cols_end = tile_cols;
}
tile_count_tg = end_tile - start_tile + 1;
num_workers = pbi->max_threads;
max_threads = pbi->max_threads;
// No tiles to decode.
if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start ||
@@ -4084,7 +4119,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
assert(tile_rows <= MAX_TILE_ROWS);
assert(tile_cols <= MAX_TILE_COLS);
assert(tile_count_tg > 0);
assert(num_workers > 0);
assert(max_threads > 0);
assert(start_tile <= end_tile);
assert(start_tile >= 0 && end_tile < n_tiles);
@@ -4116,8 +4151,10 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
max_sb_rows = AOMMAX(max_sb_rows,
av1_get_sb_rows_in_tile(cm, tile_data->tile_info));
num_workers += get_max_row_mt_workers_per_tile(cm, tile_data->tile_info);
}
}
num_workers = AOMMIN(num_workers, max_threads);
if (pbi->allocated_row_mt_sync_rows != max_sb_rows) {
for (int i = 0; i < n_tiles; ++i) {
@@ -4817,7 +4854,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
cm->error_resilient_mode = 1;
} else {
cm->show_existing_frame = aom_rb_read_bit(rb);
cm->reset_decoder_state = 0;
pbi->reset_decoder_state = 0;
if (cm->show_existing_frame) {
if (pbi->sequence_header_changed) {
@@ -4859,7 +4896,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// assign_frame_buffer_p()!
assert(!cm->cur_frame->raw_frame_buffer.data);
assign_frame_buffer_p(&cm->cur_frame, frame_to_show);
cm->reset_decoder_state = frame_to_show->frame_type == KEY_FRAME;
pbi->reset_decoder_state = frame_to_show->frame_type == KEY_FRAME;
unlock_buffer_pool(pool);
cm->lf.filter_level[0] = 0;
@@ -4869,11 +4906,11 @@ static int read_uncompressed_header(AV1Decoder *pbi,
if (!frame_to_show->showable_frame) {
aom_merge_corrupted_flag(&xd->corrupted, 1);
}
if (cm->reset_decoder_state) frame_to_show->showable_frame = 0;
if (pbi->reset_decoder_state) frame_to_show->showable_frame = 0;
cm->film_grain_params = frame_to_show->film_grain_params;
if (cm->reset_decoder_state) {
if (pbi->reset_decoder_state) {
show_existing_frame_reset(pbi, existing_frame_idx);
} else {
current_frame->refresh_frame_flags = 0;
@@ -5471,7 +5508,7 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
if (cm->show_existing_frame) {
// showing a frame directly
*p_data_end = data + uncomp_hdr_size;
if (cm->reset_decoder_state) {
if (pbi->reset_decoder_state) {
// Use the default frame context values.
*cm->fc = *cm->default_frame_context;
if (!cm->fc->initialized)
+2 -1
View File
@@ -699,7 +699,8 @@ static void read_intrabc_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
mi_col, bsize, r);
if (!valid_dv) {
// Intra bc motion vectors are not valid - signal corrupt frame
aom_merge_corrupted_flag(&xd->corrupted, 1);
aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
"Invalid intrabc dv");
}
}
}
+2 -2
View File
@@ -361,7 +361,7 @@ static void swap_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
assert(IMPLIES(!pbi->hold_ref_buf,
cm->current_frame.refresh_frame_flags == 0));
assert(IMPLIES(!pbi->hold_ref_buf,
cm->show_existing_frame && !cm->reset_decoder_state));
cm->show_existing_frame && !pbi->reset_decoder_state));
// The following two for loops need to release the reference stored in
// cm->ref_frame_map[ref_index] before transferring the reference stored
@@ -374,7 +374,7 @@ static void swap_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
}
const int check_on_show_existing_frame =
!cm->show_existing_frame || cm->reset_decoder_state;
!cm->show_existing_frame || pbi->reset_decoder_state;
for (; ref_index < REF_FRAMES && check_on_show_existing_frame;
++ref_index) {
decrease_ref_count(cm->ref_frame_map[ref_index], pool);
+1
View File
@@ -200,6 +200,7 @@ typedef struct AV1Decoder {
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // Boolean: whether we are holding reference buffers in
// common.next_ref_frame_map.
int reset_decoder_state;
int tile_size_bytes;
int tile_col_size_bytes;
+7 -4
View File
@@ -155,9 +155,6 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
return 0;
}
memset(levels_buf, 0,
sizeof(*levels_buf) *
((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
if (plane == AOM_PLANE_Y) {
// only y plane's tx_type is transmitted
av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r);
@@ -241,6 +238,12 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
}
*eob = rec_eob_pos(eob_pt, eob_extra);
if (*eob > 1) {
memset(levels_buf, 0,
sizeof(*levels_buf) *
((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
}
{
// Read the non-zero coefficient with scan index eob-1
// TODO(angiebird): Put this into a function
@@ -252,7 +255,7 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx];
int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + 1;
if (level > NUM_BASE_LEVELS) {
const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
const int br_ctx = get_br_ctx_eob(pos, bwl, tx_class);
cdf = ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx];
for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+2 -2
View File
@@ -121,7 +121,7 @@ int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
for (i = 0; i < bh; i += 4) {
for (j = 0; j < bw; j += 4) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
var +=
log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
@@ -153,7 +153,7 @@ static unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
uint8_t *buf = x->plane[0].src.buf;
const int bw = MI_SIZE * mi_size_wide[bs];
const int bh = MI_SIZE * mi_size_high[bs];
int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
const int hbd = is_cur_buf_hbd(xd);
int var = 0;
for (int r = 0; r < bh; r += 8)
+147 -84
View File
@@ -41,47 +41,37 @@ static void quantize_fp_helper_c(
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, int log_scale) {
int i, eob = -1;
const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
(void)quant_shift_ptr;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (qm_ptr == NULL && iqm_ptr == NULL) {
const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
{ // rc == 0
const int coeff = coeff_ptr[0];
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if ((abs_coeff << (1 + log_scale)) >= (int32_t)(dequant_ptr[0])) {
abs_coeff = clamp64(abs_coeff + rounding0, INT16_MIN, INT16_MAX);
const int tmp32 = (int)((abs_coeff * quant_ptr[0]) >> (16 - log_scale));
int tmp32 = 0;
if ((abs_coeff << (1 + log_scale)) >= thresh) {
abs_coeff =
clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
if (tmp32) {
qcoeff_ptr[0] = (tmp32 ^ coeff_sign) - coeff_sign;
const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[0]) >> log_scale;
dqcoeff_ptr[0] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
eob = 0;
}
}
}
const int rounding1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
const int32_t thresh1 = (int32_t)(dequant_ptr[1]);
for (i = 1; i < n_coeffs; i++) {
const int coeff = coeff_ptr[i];
const int coeff_sign = (coeff >> 31);
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if ((abs_coeff << (1 + log_scale)) >= thresh1) {
abs_coeff = clamp64(abs_coeff + rounding1, INT16_MIN, INT16_MAX);
const int tmp32 = (int)((abs_coeff * quant_ptr[1]) >> (16 - log_scale));
if (tmp32) {
qcoeff_ptr[i] = (tmp32 ^ coeff_sign) - coeff_sign;
const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[1]) >> log_scale;
dqcoeff_ptr[i] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
eob = AOMMAX(iscan[i], eob);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
const tran_low_t abs_dqcoeff =
(tmp32 * dequant_ptr[rc != 0]) >> log_scale;
dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
}
}
if (tmp32) eob = i;
}
} else {
// Quantization pass: All coefficients with index >= zero_flag are
@@ -99,7 +89,7 @@ static void quantize_fp_helper_c(
int tmp32 = 0;
if (abs_coeff * wt >=
(dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
abs_coeff += rounding[rc != 0];
abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
(16 - log_scale + AOM_QM_BITS));
@@ -275,32 +265,65 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (qm_ptr != NULL && iqm_ptr != NULL) {
quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
if (qparam->use_quant_b_adapt) {
// TODO(sarahparker) These quantize_b optimizations need SIMD
// implementations
if (qm_ptr != NULL && iqm_ptr != NULL) {
quantize_b_adaptive_helper_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
switch (qparam->log_scale) {
case 0:
aom_quantize_b_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
case 1:
aom_quantize_b_32x32_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
case 2:
aom_quantize_b_64x64_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
default: assert(0);
}
}
} else {
switch (qparam->log_scale) {
case 0:
aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
case 1:
aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
case 2:
aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
default: assert(0);
if (qm_ptr != NULL && iqm_ptr != NULL) {
quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
switch (qparam->log_scale) {
case 0:
aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
case 1:
aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
case 2:
aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
break;
default: assert(0);
}
}
}
}
@@ -391,41 +414,81 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
const QUANT_PARAM *qparam) {
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (qm_ptr != NULL && iqm_ptr != NULL) {
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
if (qparam->use_quant_b_adapt) {
if (qm_ptr != NULL && iqm_ptr != NULL) {
highbd_quantize_b_adaptive_helper_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
switch (qparam->log_scale) {
case 0:
if (LIKELY(n_coeffs >= 8)) {
aom_highbd_quantize_b_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
} else {
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
// quantization
aom_highbd_quantize_b_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
}
break;
case 1:
aom_highbd_quantize_b_32x32_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
case 2:
aom_highbd_quantize_b_64x64_adaptive_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
default: assert(0);
}
}
} else {
switch (qparam->log_scale) {
case 0:
if (LIKELY(n_coeffs >= 8)) {
aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan);
} else {
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
// quantization
aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
if (qm_ptr != NULL && iqm_ptr != NULL) {
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
switch (qparam->log_scale) {
case 0:
if (LIKELY(n_coeffs >= 8)) {
aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX,
p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
}
break;
case 1:
aom_highbd_quantize_b_32x32(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
case 2:
aom_highbd_quantize_b_64x64(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
default: assert(0);
} else {
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
// quantization
aom_highbd_quantize_b_c(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
}
break;
case 1:
aom_highbd_quantize_b_32x32(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
case 2:
aom_highbd_quantize_b_64x64(
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
eob_ptr, sc->scan, sc->iscan);
break;
default: assert(0);
}
}
}
}
+1
View File
@@ -27,6 +27,7 @@ typedef struct QUANT_PARAM {
TX_SIZE tx_size;
const qm_val_t *qmatrix;
const qm_val_t *iqmatrix;
int use_quant_b_adapt;
} QUANT_PARAM;
typedef void (*AV1_QUANT_FACADE)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+3 -7
View File
@@ -54,10 +54,10 @@ typedef struct macroblock_plane {
typedef struct {
int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
int base_cost[SIG_COEF_CONTEXTS][4];
int base_cost[SIG_COEF_CONTEXTS][8];
int eob_extra_cost[EOB_COEF_CONTEXTS][2];
int dc_sign_cost[DC_SIGN_CONTEXTS][2];
int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
} LV_MAP_COEFF_COST;
typedef struct {
@@ -192,16 +192,14 @@ typedef struct {
int32_t rate[COMPOUND_TYPES];
int64_t dist[COMPOUND_TYPES];
int_mv mv[2];
int8_t ref_frames[2];
MV_REFERENCE_FRAME ref_frames[2];
PREDICTION_MODE mode;
InterpFilters filter;
int ref_mv_idx;
int is_global[2];
} COMP_RD_STATS;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
struct inter_modes_info;
#endif
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
@@ -300,9 +298,7 @@ struct macroblock {
// to the accurate tile context.
FRAME_CONTEXT *tile_pb_ctx;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
struct inter_modes_info *inter_modes_info;
#endif
// buffer for hash value calculation of a block
// used only in av1_get_block_hash_value()
+4
View File
@@ -30,6 +30,10 @@ extern const uint16_t av1_prob_cost[128];
// Calculate the cost of a symbol with probability p15 / 2^15
static INLINE int av1_cost_symbol(aom_cdf_prob p15) {
// p15 can be out of range [1, CDF_PROB_TOP - 1]. Clamping it, so that the
// following cost calculation works correctly. Otherwise, if p15 =
// CDF_PROB_TOP, shift would be -1, and "p15 << shift" would be wrong.
p15 = (aom_cdf_prob)clamp(p15, 1, CDF_PROB_TOP - 1);
assert(0 < p15 && p15 < CDF_PROB_TOP);
const int shift = CDF_PROB_BITS - 1 - get_msb(p15);
const int prob = get_prob(p15 << shift, CDF_PROB_TOP);
+772 -13
View File
@@ -11,12 +11,244 @@
#include <stdint.h>
#include "config/aom_config.h"
#include "config/aom_scale_rtcd.h"
#include "aom/aom_codec.h"
#include "aom/aom_encoder.h"
#include "aom_ports/system_state.h"
#if CONFIG_MISMATCH_DEBUG
#include "aom_util/debug_util.h"
#endif // CONFIG_MISMATCH_DEBUG
#include "av1/common/onyxc_int.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encode_strategy.h"
#include "av1/encoder/firstpass.h"
#include "av1/encoder/temporal_filter.h"
#include "av1/encoder/tpl_model.h"
// Define the reference buffers that will be updated post encode.
void av1_configure_buffer_updates(AV1_COMP *cpi, const FRAME_UPDATE_TYPE type) {
// NOTE(weitinglin): Should we define another function to take care of
// cpi->rc.is_$Source_Type to make this function as it is in the comment?
// show_existing_frame is a flag left set from the end of encoding the
// previous frame. Alongside it, is_src_frame_alt_ref may also be left
// set so shouldn't be cleared in this case.
if (!cpi->common.show_existing_frame) cpi->rc.is_src_frame_alt_ref = 0;
cpi->rc.is_bwd_ref_frame = 0;
cpi->rc.is_last_bipred_frame = 0;
cpi->rc.is_bipred_frame = 0;
cpi->rc.is_src_frame_ext_arf = 0;
switch (type) {
case KF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 1;
cpi->refresh_alt_ref_frame = 1;
break;
case LF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
case GF_UPDATE:
// TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
// needed.
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
case OVERLAY_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
case ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
// NOTE: BWDREF does not get updated along with ALTREF_FRAME.
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 1;
break;
case BRF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_bwd_ref_frame = 1;
break;
case LAST_BIPRED_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_last_bipred_frame = 1;
break;
case BIPRED_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_bipred_frame = 1;
break;
case INTNL_OVERLAY_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
cpi->rc.is_src_frame_ext_arf = 1;
break;
case INTNL_ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
if (cpi->new_bwdref_update_rule == 1 && cpi->oxcf.pass == 2) {
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 0;
} else {
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 1;
}
cpi->refresh_alt_ref_frame = 0;
break;
default: assert(0); break;
}
}
static void set_additional_frame_flags(const AV1_COMMON *const cm,
unsigned int *const frame_flags) {
if (frame_is_intra_only(cm)) *frame_flags |= FRAMEFLAGS_INTRAONLY;
if (frame_is_sframe(cm)) *frame_flags |= FRAMEFLAGS_SWITCH;
if (cm->error_resilient_mode) *frame_flags |= FRAMEFLAGS_ERROR_RESILIENT;
}
static INLINE void update_keyframe_counters(AV1_COMP *cpi) {
// TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
// differently here for rc->avg_frame_bandwidth.
if (cpi->common.show_frame || cpi->rc.is_bwd_ref_frame) {
if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
cpi->common.current_frame.frame_type == KEY_FRAME) {
// If this is a show_existing_frame with a source other than altref,
// or if it is not a displayed forward keyframe, the keyframe update
// counters were incremented when it was originally encoded.
cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--;
}
}
}
static INLINE int is_frame_droppable(const AV1_COMP *const cpi) {
return !(cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
cpi->refresh_bwd_ref_frame || cpi->refresh_golden_frame ||
cpi->refresh_last_frame);
}
static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
// TODO(weitinglin): Updating this counter for is_frame_droppable
// is a work-around to handle the condition when a frame is drop.
// We should fix the cpi->common.show_frame flag
// instead of checking the other condition to update the counter properly.
if (cpi->common.show_frame || is_frame_droppable(cpi)) {
// Decrement count down till next gf
if (cpi->rc.frames_till_gf_update_due > 0)
cpi->rc.frames_till_gf_update_due--;
}
}
static INLINE void update_twopass_gf_group_index(AV1_COMP *cpi) {
// Increment the gf group index ready for the next frame. If this is
// a show_existing_frame with a source other than altref, or if it is not
// a displayed forward keyframe, the index was incremented when it was
// originally encoded.
if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
cpi->common.current_frame.frame_type == KEY_FRAME) {
++cpi->twopass.gf_group.index;
}
}
static void update_rc_counts(AV1_COMP *cpi) {
update_keyframe_counters(cpi);
update_frames_till_gf_update(cpi);
if (cpi->oxcf.pass == 2) update_twopass_gf_group_index(cpi);
}
static void check_show_existing_frame(AV1_COMP *cpi) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
AV1_COMMON *const cm = &cpi->common;
const FRAME_UPDATE_TYPE next_frame_update_type =
gf_group->update_type[gf_group->index];
const int which_arf = (cpi->new_bwdref_update_rule == 1)
? gf_group->arf_update_idx[gf_group->index] > 0
: gf_group->arf_update_idx[gf_group->index];
if (cm->show_existing_frame == 1) {
cm->show_existing_frame = 0;
} else if (cpi->rc.is_last_bipred_frame) {
// NOTE: When new structure is used, every bwdref will have one overlay
// frame. Therefore, there is no need to find out which frame to
// show in advance.
if (cpi->new_bwdref_update_rule == 0) {
// NOTE: If the current frame is a last bi-predictive frame, it is
// needed next to show the BWDREF_FRAME, which is pointed by
// the last_fb_idxes[0] after reference frame buffer update
cpi->rc.is_last_bipred_frame = 0;
cm->show_existing_frame = 1;
cpi->existing_fb_idx_to_show = cm->remapped_ref_idx[0];
}
} else if (cpi->is_arf_filter_off[which_arf] &&
(next_frame_update_type == OVERLAY_UPDATE ||
next_frame_update_type == INTNL_OVERLAY_UPDATE)) {
const int bwdref_to_show =
(cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
// Other parameters related to OVERLAY_UPDATE will be taken care of
// in av1_rc_get_second_pass_params(cpi)
cm->show_existing_frame = 1;
cpi->rc.is_src_frame_alt_ref = 1;
cpi->existing_fb_idx_to_show =
(next_frame_update_type == OVERLAY_UPDATE)
? get_ref_frame_map_idx(cm, ALTREF_FRAME)
: get_ref_frame_map_idx(cm, bwdref_to_show);
if (cpi->new_bwdref_update_rule == 0) {
cpi->is_arf_filter_off[which_arf] = 0;
}
}
cpi->rc.is_src_frame_ext_arf = 0;
}
static void set_ext_overrides(AV1_COMP *const cpi,
EncodeFrameParams *const frame_params) {
@@ -28,9 +260,8 @@ static void set_ext_overrides(AV1_COMP *const cpi,
AV1_COMMON *const cm = &cpi->common;
if (cpi->ext_use_s_frame) {
cm->current_frame.frame_type = S_FRAME;
frame_params->frame_type = S_FRAME;
}
cm->force_primary_ref_none = cpi->ext_use_primary_ref_none;
if (cpi->ext_refresh_frame_context_pending) {
cm->refresh_frame_context = cpi->ext_refresh_frame_context;
@@ -50,10 +281,9 @@ static void set_ext_overrides(AV1_COMP *const cpi,
// A keyframe is already error resilient and keyframes with
// error_resilient_mode interferes with the use of show_existing_frame
// when forward reference keyframes are enabled.
frame_params->error_resilient_mode &=
cm->current_frame.frame_type != KEY_FRAME;
frame_params->error_resilient_mode &= frame_params->frame_type != KEY_FRAME;
// For bitstream conformance, s-frames must be error-resilient
frame_params->error_resilient_mode |= frame_is_sframe(cm);
frame_params->error_resilient_mode |= frame_params->frame_type == S_FRAME;
}
static int get_ref_frame_flags(const AV1_COMP *const cpi) {
@@ -130,10 +360,474 @@ static int get_ref_frame_flags(const AV1_COMP *const cpi) {
return flags;
}
static int get_current_frame_ref_type(
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
// We choose the reference "type" of this frame from the flags which indicate
// which reference frames will be refreshed by it. More than one of these
// flags may be set, so the order here implies an order of precedence.
// This is just used to choose the primary_ref_frame (as the most recent
// reference buffer of the same reference-type as the current frame)
const int intra_only = frame_params->frame_type == KEY_FRAME ||
frame_params->frame_type == INTRA_ONLY_FRAME;
if (intra_only || frame_params->error_resilient_mode ||
cpi->ext_use_primary_ref_none)
return REGULAR_FRAME;
else if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE)
return EXT_ARF_FRAME;
else if (cpi->refresh_alt_ref_frame)
return ARF_FRAME;
else if (cpi->rc.is_src_frame_alt_ref)
return OVERLAY_FRAME;
else if (cpi->refresh_golden_frame)
return GLD_FRAME;
else if (cpi->refresh_bwd_ref_frame)
return BRF_FRAME;
else
return REGULAR_FRAME;
}
static int choose_primary_ref_frame(
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params) {
const AV1_COMMON *const cm = &cpi->common;
const int intra_only = frame_params->frame_type == KEY_FRAME ||
frame_params->frame_type == INTRA_ONLY_FRAME;
if (intra_only || frame_params->error_resilient_mode ||
cpi->ext_use_primary_ref_none) {
return PRIMARY_REF_NONE;
}
// Find the most recent reference frame with the same reference type as the
// current frame
const FRAME_CONTEXT_INDEX current_ref_type =
get_current_frame_ref_type(cpi, frame_params);
int wanted_fb = cpi->fb_of_context_type[current_ref_type];
int primary_ref_frame = PRIMARY_REF_NONE;
for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (get_ref_frame_map_idx(cm, ref_frame) == wanted_fb) {
primary_ref_frame = ref_frame - LAST_FRAME;
}
}
return primary_ref_frame;
}
static void update_fb_of_context_type(
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params,
int *const fb_of_context_type) {
const AV1_COMMON *const cm = &cpi->common;
if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
cpi->ext_use_primary_ref_none) {
for (int i = 0; i < REF_FRAMES; i++) {
fb_of_context_type[i] = -1;
}
fb_of_context_type[REGULAR_FRAME] =
cm->show_frame ? get_ref_frame_map_idx(cm, GOLDEN_FRAME)
: get_ref_frame_map_idx(cm, ALTREF_FRAME);
}
if (!encode_show_existing_frame(cm)) {
// Refresh fb_of_context_type[]: see encoder.h for explanation
// Note that we want the value of refresh_frame_flags for the frame that
// just happened. If we call get_refresh_frame_flags now we will get a
// different answer, because update_reference_frames() has happened.
if (cm->current_frame.frame_type == KEY_FRAME) {
// All ref frames are refreshed, pick one that will live long enough
fb_of_context_type[REGULAR_FRAME] = 0;
} else {
// If more than one frame is refreshed, it doesn't matter which one we
// pick so pick the first. LST sometimes doesn't refresh any: this is ok
const int current_frame_ref_type =
get_current_frame_ref_type(cpi, frame_params);
for (int i = 0; i < REF_FRAMES; i++) {
if (cm->current_frame.refresh_frame_flags & (1 << i)) {
fb_of_context_type[current_frame_ref_type] = i;
break;
}
}
}
}
}
static int get_order_offset(const AV1_COMP *const cpi,
const EncodeFrameParams *const frame_params) {
// shown frame by definition has order offset 0
// show_existing_frame ignores order_offset and simply takes the order_hint
// from the reference frame being shown.
if (frame_params->show_frame || cpi->common.show_existing_frame) return 0;
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const int arf_offset =
AOMMIN((MAX_GF_INTERVAL - 1), gf_group->arf_src_offset[gf_group->index]);
const int brf_offset = gf_group->brf_src_offset[gf_group->index];
return AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
}
static void adjust_frame_rate(AV1_COMP *cpi,
const struct lookahead_entry *source) {
int64_t this_duration;
int step = 0;
// Clear down mmx registers
aom_clear_system_state();
if (source->ts_start == cpi->first_time_stamp_ever) {
this_duration = source->ts_end - source->ts_start;
step = 1;
} else {
int64_t last_duration =
cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
// do a step update if the duration changes by 10%
if (last_duration)
step = (int)((this_duration - last_duration) * 10 / last_duration);
}
if (this_duration) {
if (step) {
av1_new_framerate(cpi, 10000000.0 / this_duration);
} else {
// Average this frame's rate into the last second's average
// frame rate. If we haven't seen 1 second yet, then average
// over the whole interval seen.
const double interval = AOMMIN(
(double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
double avg_duration = 10000000.0 / cpi->framerate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
av1_new_framerate(cpi, 10000000.0 / avg_duration);
}
}
cpi->last_time_stamp_seen = source->ts_start;
cpi->last_end_time_stamp_seen = source->ts_end;
}
static void check_src_altref(AV1_COMP *cpi,
const struct lookahead_entry *source) {
RATE_CONTROL *const rc = &cpi->rc;
// If pass == 2, the parameters set here will be reset in
// av1_rc_get_second_pass_params()
if (cpi->oxcf.pass == 2) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
rc->is_src_frame_alt_ref =
(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
(gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
rc->is_src_frame_ext_arf =
gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
} else {
rc->is_src_frame_alt_ref =
cpi->alt_ref_source && (source == cpi->alt_ref_source);
}
if (rc->is_src_frame_alt_ref) {
// Current frame is an ARF overlay frame.
cpi->alt_ref_source = NULL;
if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) {
// For INTNL_OVERLAY, when show_existing_frame == 0, they do need to
// refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3,
// LAST becomes LAST2, and INTNL_OVERLAY becomes LAST.
cpi->refresh_last_frame = 1;
} else {
// Don't refresh the last buffer for an ARF overlay frame. It will
// become the GF so preserve last as an alternative prediction option.
cpi->refresh_last_frame = 0;
}
}
}
// Returns 0 if this is not an alt ref else the offset of the source frame
// used as the arf midpoint.
static int get_arf_src_index(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
int arf_src_index = 0;
if (is_altref_enabled(cpi)) {
if (cpi->oxcf.pass == 2) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
arf_src_index = gf_group->arf_src_offset[gf_group->index];
}
} else if (rc->source_alt_ref_pending) {
arf_src_index = rc->frames_till_gf_update_due;
}
}
return arf_src_index;
}
static int get_brf_src_index(AV1_COMP *cpi) {
int brf_src_index = 0;
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
// TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
// flag.
if (gf_group->bidir_pred_enabled[gf_group->index]) {
if (cpi->oxcf.pass == 2) {
if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
brf_src_index = gf_group->brf_src_offset[gf_group->index];
} else {
// TODO(zoeliu): To re-visit the setup for this scenario
brf_src_index = cpi->rc.bipred_group_interval - 1;
}
}
return brf_src_index;
}
// Returns 0 if this is not an alt ref else the offset of the source frame
// used as the arf midpoint.
static int get_arf2_src_index(AV1_COMP *cpi) {
int arf2_src_index = 0;
if (is_altref_enabled(cpi) && cpi->num_extra_arfs) {
if (cpi->oxcf.pass == 2) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
arf2_src_index = gf_group->arf_src_offset[gf_group->index];
}
}
}
return arf2_src_index;
}
// Called if this frame is an ARF or ARF2. Also handles forward-keyframes
// For an ARF set arf2=0, for ARF2 set arf2=1
// temporal_filtered is set to 1 if we temporally filter the ARF frame, so that
// the correct post-filter buffer can be used.
static struct lookahead_entry *setup_arf_or_arf2(
AV1_COMP *const cpi, const int arf_src_index, const int arf2,
int *temporal_filtered, EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
assert(arf_src_index <= rc->frames_to_key);
*temporal_filtered = 0;
struct lookahead_entry *source =
av1_lookahead_peek(cpi->lookahead, arf_src_index);
if (source != NULL) {
cm->showable_frame = 1;
cpi->alt_ref_source = source;
// When arf_src_index == rc->frames_to_key, it indicates a fwd_kf
if (!arf2 && arf_src_index == rc->frames_to_key) {
// Skip temporal filtering and mark as intra_only if we have a fwd_kf
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
int which_arf = gf_group->arf_update_idx[gf_group->index];
cpi->is_arf_filter_off[which_arf] = 1;
cpi->no_show_kf = 1;
} else {
if (oxcf->arnr_max_frames > 0) {
// Produce the filtered ARF frame.
av1_temporal_filter(cpi, arf_src_index);
aom_extend_frame_borders(&cpi->alt_ref_buffer, av1_num_planes(cm));
*temporal_filtered = 1;
}
}
frame_params->show_frame = 0;
if (oxcf->pass < 2) {
// In second pass, the buffer updates configure will be set
// in the function av1_rc_get_second_pass_params
av1_configure_buffer_updates(cpi, arf2 ? INTNL_ARF_UPDATE : ARF_UPDATE);
}
}
rc->source_alt_ref_pending = 0;
return source;
}
// Determine whether there is a forced keyframe pending in the lookahead buffer
static int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
const int up_to_index) {
for (int i = 0; i <= up_to_index; i++) {
const struct lookahead_entry *e = av1_lookahead_peek(lookahead, i);
if (e == NULL) {
// We have reached the end of the lookahead buffer and not early-returned
// so there isn't a forced key-frame pending.
return 0;
} else if (e->flags == AOM_EFLAG_FORCE_KF) {
return 1;
} else {
continue;
}
}
return 0; // Never reached
}
// Check if we should encode an ARF, ARF2 or BRF. If not, try a LAST
// Do some setup associated with the chosen source
// Return the frame source, or NULL if we couldn't find one
struct lookahead_entry *choose_frame_source(
AV1_COMP *const cpi, int *const temporal_filtered, int *const flush,
struct lookahead_entry **last_source,
EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
struct lookahead_entry *source = NULL;
*temporal_filtered = 0;
// Should we encode an alt-ref frame.
int arf_src_index = get_arf_src_index(cpi);
if (arf_src_index &&
is_forced_keyframe_pending(cpi->lookahead, arf_src_index)) {
arf_src_index = 0;
*flush = 1;
}
if (arf_src_index) {
source = setup_arf_or_arf2(cpi, arf_src_index, 0, temporal_filtered,
frame_params);
}
// Should we encode an arf2 frame (mutually exclusive to ARF)
arf_src_index = get_arf2_src_index(cpi);
if (arf_src_index &&
is_forced_keyframe_pending(cpi->lookahead, arf_src_index)) {
arf_src_index = 0;
*flush = 1;
}
if (arf_src_index) {
source = setup_arf_or_arf2(cpi, arf_src_index, 1, temporal_filtered,
frame_params);
}
cpi->rc.is_bwd_ref_frame = 0;
int brf_src_index = get_brf_src_index(cpi);
if (brf_src_index) {
assert(brf_src_index <= cpi->rc.frames_to_key);
if ((source = av1_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
cm->showable_frame = 1;
frame_params->show_frame = 0;
if (cpi->oxcf.pass < 2) {
// In second pass, the buffer updates configure will be set
// in the function av1_rc_get_second_pass_params
av1_configure_buffer_updates(cpi, BRF_UPDATE);
}
}
}
if (!source) {
// Get last frame source.
if (cm->current_frame.frame_number > 0) {
*last_source = av1_lookahead_peek(cpi->lookahead, -1);
}
// Read in the source frame.
source = av1_lookahead_pop(cpi->lookahead, *flush);
if (source != NULL) {
frame_params->show_frame = 1;
// Check to see if the frame should be encoded as an arf overlay.
check_src_altref(cpi, source);
}
}
return source;
}
int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
uint8_t *const dest, unsigned int *frame_flags) {
EncodeFrameParams frame_params = { 0, 0, 0 };
EncodeFrameResults frame_results = { 0 };
uint8_t *const dest, unsigned int *frame_flags,
int64_t *const time_stamp, int64_t *const time_end,
const aom_rational_t *const timebase, int flush) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
EncodeFrameInput frame_input;
EncodeFrameParams frame_params;
EncodeFrameResults frame_results;
memset(&frame_input, 0, sizeof(frame_input));
memset(&frame_params, 0, sizeof(frame_params));
memset(&frame_results, 0, sizeof(frame_results));
int temporal_filtered = 0;
struct lookahead_entry *source = NULL;
struct lookahead_entry *last_source = NULL;
if (cm->show_existing_frame) {
source = av1_lookahead_pop(cpi->lookahead, flush);
} else {
source = choose_frame_source(cpi, &temporal_filtered, &flush, &last_source,
&frame_params);
}
if (source == NULL) { // If no source was found, we can't encode a frame.
if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
av1_end_first_pass(cpi); /* get last stats packet */
cpi->twopass.first_pass_done = 1;
}
return -1;
}
frame_input.source = temporal_filtered ? &cpi->alt_ref_buffer : &source->img;
frame_input.last_source = last_source != NULL ? &last_source->img : NULL;
frame_input.ts_duration = source->ts_end - source->ts_start;
*time_stamp = source->ts_start;
*time_end = source->ts_end;
if (source->ts_start < cpi->first_time_stamp_ever) {
cpi->first_time_stamp_ever = source->ts_start;
cpi->last_end_time_stamp_seen = source->ts_start;
}
av1_apply_encoding_flags(cpi, source->flags);
if (!cm->show_existing_frame)
*frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
cpi->frame_flags = *frame_flags;
if (frame_params.show_frame ||
(cm->show_existing_frame && cpi->rc.is_src_frame_alt_ref)) {
// Shown frames and arf-overlay frames need frame-rate considering
adjust_frame_rate(cpi, source);
}
if (cm->show_existing_frame) {
// show_existing_frame implies this frame is shown!
frame_params.show_frame = 1;
} else {
// Retain the RF_LEVEL for the current newly coded frame.
cm->cur_frame->frame_rf_level =
cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
if (cpi->film_grain_table) {
cm->seq_params.film_grain_params_present = aom_film_grain_table_lookup(
cpi->film_grain_table, *time_stamp, *time_end, 0 /* =erase */,
&cm->film_grain_params);
}
cm->cur_frame->film_grain_params_present =
cm->seq_params.film_grain_params_present;
// only one operating point supported now
const int64_t pts64 = ticks_to_timebase_units(timebase, *time_stamp);
if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR;
cpi->common.frame_presentation_time = (uint32_t)pts64;
}
if (oxcf->pass == 2 &&
(!cm->show_existing_frame || cpi->rc.is_src_frame_alt_ref)) {
// GF_GROUP needs updating for arf overlays as well as non-show-existing
av1_rc_get_second_pass_params(cpi, &frame_params);
}
if (cm->show_existing_frame && frame_params.frame_type != KEY_FRAME) {
// Force show-existing frames to be INTER, except forward keyframes
frame_params.frame_type = INTER_FRAME;
}
if (!cm->show_existing_frame) {
cm->using_qmatrix = cpi->oxcf.using_qm;
cm->min_qmlevel = cpi->oxcf.qm_minlevel;
cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
if (cpi->twopass.gf_group.index == 1 && cpi->oxcf.enable_tpl_model) {
av1_set_frame_size(cpi, cm->width, cm->height);
av1_tpl_setup_stats(cpi, &frame_input);
}
}
frame_params.frame_flags = frame_flags;
@@ -143,17 +837,82 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
// TODO(david.turner@argondesign.com): Change all the encode strategy to
// modify frame_params instead of cm or cpi.
// Apply external override flags
set_ext_overrides(cpi, &frame_params);
// Per-frame encode speed. In theory this can vary, but things may have been
// written assuming speed-level will not change within a sequence, so this
// parameter should be used with caution.
frame_params.speed = oxcf->speed;
// Work out which reference frame slots may be used.
frame_params.ref_frame_flags = get_ref_frame_flags(cpi);
// Work out some encoding parameters specific to the pass:
if (oxcf->pass == 0) {
if (cpi->oxcf.rc_mode == AOM_CBR) {
av1_rc_get_one_pass_cbr_params(cpi, &frame_params);
} else {
av1_rc_get_one_pass_vbr_params(cpi, &frame_params);
}
} else if (oxcf->pass == 1) {
av1_setup_frame_size(cpi);
cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(&cpi->oxcf);
if (!cpi->refresh_alt_ref_frame && (cm->current_frame.frame_number == 0 ||
(cpi->frame_flags & FRAMEFLAGS_KEY))) {
frame_params.frame_type = KEY_FRAME;
} else {
frame_params.frame_type = INTER_FRAME;
}
} else if (oxcf->pass == 2) {
#if CONFIG_MISMATCH_DEBUG
mismatch_move_frame_idx_w();
#endif
#if TXCOEFF_COST_TIMER
cm->txcoeff_cost_timer = 0;
cm->txcoeff_cost_count = 0;
#endif
}
if (av1_encode(cpi, dest, &frame_params, &frame_results) != AOM_CODEC_OK) {
if (oxcf->pass == 0 || oxcf->pass == 2) {
// Apply external override flags
set_ext_overrides(cpi, &frame_params);
// Work out which reference frame slots may be used.
frame_params.ref_frame_flags = get_ref_frame_flags(cpi);
}
if (oxcf->pass == 0 || oxcf->pass == 2) {
frame_params.primary_ref_frame =
choose_primary_ref_frame(cpi, &frame_params);
frame_params.order_offset = get_order_offset(cpi, &frame_params);
}
if (av1_encode(cpi, dest, &frame_input, &frame_params, &frame_results) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
if (oxcf->pass == 2) {
#if TXCOEFF_COST_TIMER
cm->cum_txcoeff_cost_timer += cm->txcoeff_cost_timer;
fprintf(stderr,
"\ntxb coeff cost block number: %ld, frame time: %ld, cum time %ld "
"in us\n",
cm->txcoeff_cost_count, cm->txcoeff_cost_timer,
cm->cum_txcoeff_cost_timer);
#endif
av1_twopass_postencode_update(cpi);
}
if (oxcf->pass == 0 || oxcf->pass == 2) {
update_fb_of_context_type(cpi, &frame_params, cpi->fb_of_context_type);
set_additional_frame_flags(cm, frame_params.frame_flags);
update_rc_counts(cpi);
check_show_existing_frame(cpi); // Is next frame a show_existing frame?
}
// Unpack frame_results:
*size = frame_results.size;
// Leave a signal for a higher level caller about if this frame is droppable
if (*size > 0) {
cpi->droppable = is_frame_droppable(cpi);
}
return AOM_CODEC_OK;
}
+14 -1
View File
@@ -16,11 +16,24 @@
extern "C" {
#endif
#include <stdint.h>
#include "aom/aom_encoder.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/firstpass.h"
// This function will implement high-level encode strategy, choosing frame type,
// frame placement, etc. It populates an EncodeFrameParams struct with the
// results of these decisions and then calls av1_encode()
int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
uint8_t *const dest, unsigned int *frame_flags);
uint8_t *const dest, unsigned int *frame_flags,
int64_t *const time_stamp, int64_t *const time_end,
const aom_rational_t *const timebase, int flush);
// Set individual buffer update flags based on frame reference type
void av1_configure_buffer_updates(AV1_COMP *const cpi,
const FRAME_UPDATE_TYPE type);
#ifdef __cplusplus
} // extern "C"
+118 -109
View File
@@ -600,7 +600,7 @@ static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
return;
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
x->source_variance = av1_high_get_sby_perpixel_variance(
cpi, &x->plane[0].src, bsize, xd->bd);
} else {
@@ -613,8 +613,7 @@ static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
x->edge_strength = UINT16_MAX;
} else {
x->edge_strength =
edge_strength(&x->plane[0].src, bsize,
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd);
edge_strength(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
}
// Save rdmult before it might be changed, so it can be restored later.
orig_rdmult = x->rdmult;
@@ -2180,7 +2179,8 @@ static void simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
// Get a copy of the prediction output
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
AOM_PLANE_Y, AOM_PLANE_Y);
aom_clear_system_state();
@@ -2787,77 +2787,6 @@ static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
}
}
#define FEATURE_SIZE 19
static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f,
0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f,
0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f,
};
static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f,
-1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f,
-0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f,
0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f,
};
static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f,
-0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f,
-1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f,
0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f,
};
static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f,
-0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f,
-1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f,
-0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f,
};
static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f,
-0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f,
-2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f,
0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f,
};
static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
-1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f,
-0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f,
3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f,
-0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f,
};
static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
-1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f,
-4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f,
1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f,
-0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f,
};
static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
-2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f,
-2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f,
0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f,
-0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f,
};
static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
-1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f,
-3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f,
0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f,
-0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f,
};
static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
-1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f,
-1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f,
0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f,
0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
};
// split_score indicates confidence of picking split partition;
// none_score indicates confidence of picking none partition;
static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
@@ -2980,7 +2909,7 @@ static void ml_prune_rect_partition(const AV1_COMP *const cpi,
// Variance ratios
const MACROBLOCKD *const xd = &x->e_mbd;
int whole_block_variance;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
whole_block_variance = av1_high_get_sby_perpixel_variance(
cpi, &x->plane[0].src, bsize, xd->bd);
} else {
@@ -2998,7 +2927,7 @@ static void ml_prune_rect_partition(const AV1_COMP *const cpi,
const int x_idx = (i & 1) * bw / 2;
const int y_idx = (i >> 1) * bw / 2;
buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
split_variance[i] =
av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
} else {
@@ -3180,7 +3109,7 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
src + i * block_size_high[horz_4_bs] * src_stride;
const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
unsigned int horz_var, vert_var, sse;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
switch (xd->bd) {
case 10:
horz_var = cpi->fn_ptr[horz_4_bs].vf(
@@ -3898,6 +3827,13 @@ static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
(void)*tp_orig;
#if CONFIG_COLLECT_PARTITION_STATS
PartitionStats *part_stats = &cpi->partition_stats;
const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
int *partition_decisions = part_stats->partition_decisions[bsize_idx];
int *partition_attempts = part_stats->partition_attempts[bsize_idx];
#endif
// Override partition costs at the edges of the frame in the same
// way as in read_partition (see decodeframe.c)
if (!(has_rows && has_cols)) {
@@ -4154,6 +4090,11 @@ BEGIN_PARTITION_SEARCH:
const int64_t best_remain_rdcost =
(best_rdc.rdcost == INT64_MAX) ? INT64_MAX
: (best_rdc.rdcost - partition_rd_cost);
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
partition_attempts[PARTITION_NONE] += 1;
}
#endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
pb_source_variance = x->source_variance;
@@ -4291,6 +4232,11 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
int idx;
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_SPLIT] += 1;
}
#endif
for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
const int x_idx = (idx & 1) * mi_step;
const int y_idx = (idx >> 1) * mi_step;
@@ -4469,11 +4415,16 @@ BEGIN_PARTITION_SEARCH:
pc_tree->horizontal[0].pred_interp_filter =
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
}
sum_rdc.rate = partition_cost[PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
? INT64_MAX
: (best_rdc.rdcost - sum_rdc.rdcost);
sum_rdc.rate = partition_cost[PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
partition_attempts[PARTITION_HORZ] += 1;
}
#endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
best_remain_rdcost);
@@ -4551,6 +4502,11 @@ BEGIN_PARTITION_SEARCH:
const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
? INT64_MAX
: (best_rdc.rdcost - sum_rdc.rdcost);
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
partition_attempts[PARTITION_VERT] += 1;
}
#endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
PARTITION_VERT, subsize, &pc_tree->vertical[0],
best_remain_rdcost);
@@ -4609,7 +4565,7 @@ BEGIN_PARTITION_SEARCH:
if (pb_source_variance == UINT_MAX) {
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
pb_source_variance = av1_high_get_sby_perpixel_variance(
cpi, &x->plane[0].src, bsize, xd->bd);
} else {
@@ -4770,6 +4726,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->horizontala[2].ref_selected[0] = split_mbmi[2]->ref_frame[0];
}
}
#if CONFIG_COLLECT_PARTITION_STATS
{
RD_STATS tmp_sum_rdc;
av1_init_rd_stats(&tmp_sum_rdc);
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
if (!frame_is_intra_only(cm) &&
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_HORZ_A] += 1;
}
}
#endif
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
@@ -4829,6 +4797,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->horizontalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
}
}
#if CONFIG_COLLECT_PARTITION_STATS
{
RD_STATS tmp_sum_rdc;
av1_init_rd_stats(&tmp_sum_rdc);
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
if (!frame_is_intra_only(cm) &&
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_HORZ_B] += 1;
}
}
#endif
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_B, mi_row, mi_col, subsize,
@@ -4886,6 +4866,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->verticala[2].ref_selected[0] = split_mbmi[1]->ref_frame[0];
}
}
#if CONFIG_COLLECT_PARTITION_STATS
{
RD_STATS tmp_sum_rdc;
av1_init_rd_stats(&tmp_sum_rdc);
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
if (!frame_is_intra_only(cm) &&
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_VERT_A] += 1;
}
}
#endif
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_A, mi_row, mi_col, bsize2,
@@ -4942,6 +4934,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->verticalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
}
}
#if CONFIG_COLLECT_PARTITION_STATS
{
RD_STATS tmp_sum_rdc;
av1_init_rd_stats(&tmp_sum_rdc);
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
if (!frame_is_intra_only(cm) &&
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_VERT_B] += 1;
}
}
#endif
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
@@ -5000,6 +5004,11 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_HORZ_4] += 1;
}
#endif
for (int i = 0; i < 4; ++i) {
const int this_mi_row = mi_row + i * quarter_step;
@@ -5046,6 +5055,11 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.rate = partition_cost[PARTITION_VERT_4];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
partition_attempts[PARTITION_VERT_4] += 1;
}
#endif
for (int i = 0; i < 4; ++i) {
const int this_mi_col = mi_col + i * quarter_step;
@@ -5083,6 +5097,11 @@ BEGIN_PARTITION_SEARCH:
// Did not find a valid partition, go back and search again, with less
// constraint on which partition types to search.
x->must_find_valid_partition = 1;
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm)) {
part_stats->partition_redo += 1;
}
#endif
goto BEGIN_PARTITION_SEARCH;
}
@@ -5093,6 +5112,13 @@ BEGIN_PARTITION_SEARCH:
(void)best_rd;
*rd_cost = best_rdc;
#if CONFIG_COLLECT_PARTITION_STATS
if (!frame_is_intra_only(cm) && best_rdc.rate < INT_MAX &&
best_rdc.dist < INT64_MAX) {
partition_decisions[pc_tree->partitioning] += 1;
}
#endif
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
if (bsize == cm->seq_params.sb_size) {
@@ -5643,13 +5669,11 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
sb_size, BLOCK_4X4, &dummy_rdc, INT64_MAX, pc_root,
NULL);
}
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
// TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 &&
cm->tile_rows == 1) {
av1_inter_mode_data_fit(tile_data, x->rdmult);
}
#endif
if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
(tile_info->mi_row_end > (mi_row + mib_size))) {
if (sb_cols_in_tile == 1)
@@ -5805,9 +5829,7 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
const TileInfo *const tile_info = &this_tile->tile_info;
int mi_row;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
av1_inter_mode_data_init(this_tile);
#endif
av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
tile_info->mi_col_end, tile_row);
@@ -6350,11 +6372,10 @@ static void encode_frame_internal(AV1_COMP *cpi) {
do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
!(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
TransformationType model;
const int64_t ref_frame_error =
av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
cpi->source->y_buffer, cpi->source->y_width,
cpi->source->y_height, cpi->source->y_stride);
const int64_t ref_frame_error = av1_frame_error(
is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
ref_buf[frame]->y_stride, cpi->source->y_buffer,
cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride);
if (ref_frame_error == 0) continue;
@@ -6380,9 +6401,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
if (tmp_wm_params.wmtype != IDENTITY) {
const int64_t warp_error = av1_refine_integerized_param(
&tmp_wm_params, tmp_wm_params.wmtype,
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
&tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd),
xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
cpi->source->y_buffer, cpi->source->y_width,
cpi->source->y_height, cpi->source->y_stride, 5,
@@ -6491,20 +6511,6 @@ void av1_encode_frame(AV1_COMP *cpi) {
// rather than the potential full set of 16 transforms
cm->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
if (cm->show_frame == 0) {
int arf_offset = AOMMIN(
(MAX_GF_INTERVAL - 1),
cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
int brf_offset =
cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
current_frame->order_hint = current_frame->frame_number + arf_offset;
} else {
current_frame->order_hint = current_frame->frame_number;
}
current_frame->order_hint %=
(1 << (cm->seq_params.order_hint_info.order_hint_bits_minus_1 + 1));
// Make sure segment_id is no larger than last_active_segid.
if (cm->seg.enabled && cm->seg.update_map) {
const int mi_rows = cm->mi_rows;
@@ -6520,7 +6526,9 @@ void av1_encode_frame(AV1_COMP *cpi) {
}
av1_setup_frame_buf_refs(cm);
if (cpi->sf.selective_ref_frame >= 3) enforce_max_ref_frames(cpi);
if (cpi->sf.selective_ref_frame >= 3 && cpi->oxcf.max_reference_frames == 7) {
enforce_max_ref_frames(cpi);
}
av1_setup_frame_sign_bias(cm);
#if CONFIG_MISMATCH_DEBUG
@@ -6830,7 +6838,8 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
xd->block_ref_scale_factors[ref], num_planes);
}
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
av1_num_planes(cm) - 1);
if (mbmi->motion_mode == OBMC_CAUSAL) {
assert(cpi->oxcf.enable_obmc == 1);
av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
+5 -4
View File
@@ -43,7 +43,7 @@ static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
const uint8_t *src8, ptrdiff_t src_stride,
const uint8_t *pred8, ptrdiff_t pred_stride) {
if (check_subtract_block_size(rows, cols)) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
src_stride, pred8, pred_stride, xd->bd);
return;
@@ -54,7 +54,7 @@ static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
return;
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
pred8, pred_stride, xd->bd);
return;
@@ -163,6 +163,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
qparam.tx_size = tx_size;
qparam.qmatrix = qmatrix;
qparam.iqmatrix = iqmatrix;
qparam.use_quant_b_adapt = cm->use_quant_b_adapt;
TxfmParam txfm_param;
txfm_param.tx_type = tx_type;
txfm_param.tx_size = tx_size;
@@ -171,7 +172,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
txfm_param.bd = xd->bd;
txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
txfm_param.is_hbd = is_cur_buf_hbd(xd);
av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
@@ -431,7 +432,7 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
if (p->eobs[block] > 0) {
txfm_param.bd = xd->bd;
txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
txfm_param.is_hbd = is_cur_buf_hbd(xd);
txfm_param.tx_type = DCT_DCT;
txfm_param.tx_size = tx_size;
txfm_param.eob = p->eobs[block];
+81 -1290
View File
File diff suppressed because it is too large Load Diff
+106 -14
View File
@@ -329,6 +329,7 @@ typedef struct AV1EncoderConfig {
int enable_order_hint;
int enable_dist_wtd_comp;
int enable_ref_frame_mvs;
unsigned int max_reference_frames;
unsigned int allow_ref_frame_mvs;
int enable_masked_comp;
int enable_interintra_comp;
@@ -357,6 +358,9 @@ typedef struct AV1EncoderConfig {
unsigned int chroma_subsampling_x;
unsigned int chroma_subsampling_y;
int reduced_tx_type_set;
int use_intra_dct_only;
int use_inter_dct_only;
int quant_b_adapt;
int border_in_pixels;
} AV1EncoderConfig;
@@ -450,7 +454,6 @@ typedef struct FRAME_COUNTS {
[SWITCHABLE_FILTERS];
} FRAME_COUNTS;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
#define INTER_MODE_RD_DATA_OVERALL_SIZE 6400
typedef struct {
@@ -485,7 +488,6 @@ typedef struct inter_modes_info {
int64_t est_rd_arr[MAX_INTER_MODES];
RdIdxPair rd_idx_pair_arr[MAX_INTER_MODES];
} InterModesInfo;
#endif
// Encoder row synchronization
typedef struct AV1RowMTSyncData {
@@ -514,9 +516,7 @@ typedef struct TileDataEnc {
DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
FRAME_CONTEXT *row_ctx;
uint8_t allow_update_cdf;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
#endif
AV1RowMTSync row_mt_sync;
AV1RowMTInfo row_mt_info;
} TileDataEnc;
@@ -551,9 +551,7 @@ typedef struct ThreadData {
tran_low_t *tree_coeff_buf[MAX_MB_PLANE];
tran_low_t *tree_qcoeff_buf[MAX_MB_PLANE];
tran_low_t *tree_dqcoeff_buf[MAX_MB_PLANE];
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
InterModesInfo *inter_modes_info;
#endif
uint32_t *hash_value_buffer[2][2];
int32_t *wsrc_buf;
int32_t *mask_buf;
@@ -595,6 +593,15 @@ typedef struct {
YV12_BUFFER_CONFIG buf;
} EncRefCntBuffer;
#if CONFIG_COLLECT_PARTITION_STATS
typedef struct PartitionStats {
int partition_decisions[6][EXT_PARTITION_TYPES];
int partition_attempts[6][EXT_PARTITION_TYPES];
int partition_redo;
} PartitionStats;
#endif
typedef struct AV1_COMP {
QUANTS quants;
ThreadData td;
@@ -658,14 +665,12 @@ typedef struct AV1_COMP {
// frame of the same type as the current frame).
int fb_of_context_type[REF_FRAMES];
#if USE_SYMM_MULTI_LAYER
// When true, a new rule for backward (future) reference frames is in effect:
// - BWDREF_FRAME is always the closest future frame available
// - ALTREF2_FRAME is always the 2nd closest future frame available
// - 'refresh_bwd_ref_frame' flag is used for updating both the BWDREF_FRAME
// and ALTREF2_FRAME. ('refresh_alt2_ref_frame' flag is irrelevant).
int new_bwdref_update_rule;
#endif
int ext_refresh_frame_flags_pending;
int ext_refresh_last_frame;
@@ -718,6 +723,9 @@ typedef struct AV1_COMP {
int ref_frame_flags;
int ext_ref_frame_flags;
// speed is passed as a per-frame parameter into the encoder
int speed;
// sf contains fine-grained config set internally based on speed
SPEED_FEATURES sf;
unsigned int max_mv_magnitude;
@@ -865,18 +873,35 @@ typedef struct AV1_COMP {
#endif
// Set if screen content is set or relevant tools are enabled
int is_screen_content_type;
#if CONFIG_COLLECT_PARTITION_STATS
PartitionStats partition_stats;
#endif
} AV1_COMP;
typedef struct {
YV12_BUFFER_CONFIG *source;
YV12_BUFFER_CONFIG *last_source;
int64_t ts_duration;
} EncodeFrameInput;
// EncodeFrameParams contains per-frame encoding parameters decided upon by
// av1_encode_strategy() and passed down to av1_encode()
typedef struct {
struct EncodeFrameParams {
int error_resilient_mode;
FRAME_TYPE frame_type;
int primary_ref_frame;
int order_offset;
int show_frame;
// This is a bitmask of which reference slots can be used in this frame
int ref_frame_flags;
// Speed level to use for this frame: Bigger number means faster.
int speed;
unsigned int *frame_flags;
} EncodeFrameParams;
};
typedef struct EncodeFrameParams EncodeFrameParams;
// EncodeFrameResults contains information about the result of encoding a
// single frame
@@ -905,6 +930,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
const aom_rational_t *timebase);
int av1_encode(AV1_COMP *const cpi, uint8_t *const dest,
const EncodeFrameInput *const frame_input,
const EncodeFrameParams *const frame_params,
EncodeFrameResults *const frame_results);
@@ -918,12 +944,12 @@ aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags);
void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags);
int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
void av1_set_frame_size(AV1_COMP *cpi, int width, int height);
int av1_update_entropy(AV1_COMP *cpi, int update);
int av1_set_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
@@ -937,8 +963,19 @@ int av1_get_quantizer(struct AV1_COMP *cpi);
int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n);
int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n);
// av1 uses 10,000,000 ticks/second as time stamp
#define TICKS_PER_SEC 10000000LL
static INLINE int64_t timebase_units_to_ticks(const aom_rational_t *timebase,
int64_t n) {
return n * TICKS_PER_SEC * timebase->num / timebase->den;
}
static INLINE int64_t ticks_to_timebase_units(const aom_rational_t *timebase,
int64_t n) {
const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
}
static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
@@ -975,6 +1012,13 @@ static INLINE int enc_is_ref_frame_buf(const AV1_COMMON *const cm,
return (ref_frame <= ALTREF_FRAME);
}
static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, RefCntBuffer *buf) {
assert(buf != NULL);
ensure_mv_buffer(buf, cm);
buf->width = cm->width;
buf->height = cm->height;
}
// Token buffer is only used for palette tokens.
static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
int sb_size_log2,
@@ -1046,6 +1090,8 @@ static INLINE int *cond_cost_list(const struct AV1_COMP *cpi, int *cost_list) {
void av1_new_framerate(AV1_COMP *cpi, double framerate);
void av1_setup_frame_size(AV1_COMP *cpi);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
// Returns 1 if a frame is scaled and 0 otherwise.
@@ -1077,6 +1123,52 @@ static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) {
// field.
aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
#if CONFIG_COLLECT_PARTITION_STATS
static INLINE void av1_print_partition_stats(PartitionStats *part_stats) {
FILE *f = fopen("partition_stats.csv", "w");
if (!f) {
return;
}
fprintf(f, "bsize,redo,");
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
fprintf(f, "decision_%d,", part);
}
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
fprintf(f, "attempt_%d,", part);
}
fprintf(f, "\n");
const int bsizes[6] = { 128, 64, 32, 16, 8, 4 };
for (int bsize_idx = 0; bsize_idx < 6; bsize_idx++) {
fprintf(f, "%d,%d,", bsizes[bsize_idx], part_stats->partition_redo);
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
fprintf(f, "%d,", part_stats->partition_decisions[bsize_idx][part]);
}
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
fprintf(f, "%d,", part_stats->partition_attempts[bsize_idx][part]);
}
fprintf(f, "\n");
}
fclose(f);
}
static INLINE int av1_get_bsize_idx_for_part_stats(BLOCK_SIZE bsize) {
assert(bsize == BLOCK_128X128 || bsize == BLOCK_64X64 ||
bsize == BLOCK_32X32 || bsize == BLOCK_16X16 || bsize == BLOCK_8X8);
switch (bsize) {
case BLOCK_128X128: return 0;
case BLOCK_64X64: return 1;
case BLOCK_32X32: return 2;
case BLOCK_16X16: return 3;
case BLOCK_8X8: return 4;
case BLOCK_4X4: return 5;
default: assert(0 && "Invalid bsize for partition_stats."); return -1;
}
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
+94 -34
View File
@@ -284,6 +284,17 @@ static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
return av1_cost_literal(1);
}
static const int golomb_bits_cost[32] = {
0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
};
static const int golomb_cost_diff[32] = {
0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static INLINE int get_golomb_cost(int abs_qc) {
if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
@@ -293,6 +304,27 @@ static INLINE int get_golomb_cost(int abs_qc) {
return 0;
}
static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
int *diff) {
const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
int golomb_bits = 0;
if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
*diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
if (r < 32) {
golomb_bits = golomb_bits_cost[r];
*diff += golomb_cost_diff[r];
} else {
golomb_bits = get_golomb_cost(level);
*diff += (r & (r - 1)) == 0 ? 1024 : 0;
}
}
return coeff_lps[base_range] + golomb_bits;
}
static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
return coeff_lps[base_range] + get_golomb_cost(level);
@@ -732,7 +764,8 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb(
av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost;
const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
coeff_costs->lps_cost;
int c = eob - 1;
{
const int pos = scan[c];
@@ -758,7 +791,7 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb(
}
}
}
const int(*base_cost)[4] = coeff_costs->base_cost;
const int(*base_cost)[8] = coeff_costs->base_cost;
for (c = eob - 2; c >= 1; --c) {
const int pos = scan[c];
const int coeff_ctx = coeff_contexts[pos];
@@ -1262,21 +1295,28 @@ static int hbt_create_hashes(TxbInfo *txb_info,
txb_eob_costs, p, block, fast_mode, rate_cost);
}
static AOM_FORCE_INLINE int get_coeff_cost_simple(
static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
int ci, tran_low_t abs_qc, int coeff_ctx,
const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
const uint8_t *levels) {
const uint8_t *levels, int *cost_low) {
// this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
// and not the last (scan_idx != eob - 1)
assert(ci > 0);
int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
int diff = 0;
if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
if (abs_qc) {
cost += av1_cost_literal(1);
if (abs_qc > NUM_BASE_LEVELS) {
const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
int brcost_diff = 0;
cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
&brcost_diff);
diff += brcost_diff;
}
}
*cost_low = cost - diff;
return cost;
}
@@ -1369,13 +1409,23 @@ static INLINE void update_coeff_general(
const int64_t rd = RDCOST(rdmult, rate, dist);
tran_low_t qc_low, dqc_low;
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
const tran_low_t abs_qc_low = abs_qc - 1;
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
const int rate_low =
get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
tran_low_t abs_qc_low;
int64_t dist_low, rd_low;
int rate_low;
if (abs_qc == 1) {
abs_qc_low = qc_low = dqc_low = 0;
dist_low = dist0;
rate_low = txb_costs->base_cost[coeff_ctx][0];
} else {
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
abs_qc_low = abs_qc - 1;
dist_low = get_coeff_dist(tqc, dqc_low, shift);
rate_low =
get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
}
rd_low = RDCOST(rdmult, rate_low, dist_low);
if (rd_low < rd) {
qcoeff[ci] = qc_low;
dqcoeff[ci] = dqc_low;
@@ -1409,28 +1459,28 @@ static AOM_FORCE_INLINE void update_coeff_simple(
*accu_rate += txb_costs->base_cost[coeff_ctx][0];
} else {
const tran_low_t abs_qc = abs(qc);
const tran_low_t tqc = tcoeff[ci];
const tran_low_t dqc = dqcoeff[ci];
const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs,
bwl, tx_class, levels);
if (abs(dqc) < abs(tqc)) {
const tran_low_t abs_tqc = abs(tcoeff[ci]);
const tran_low_t abs_dqc = abs(dqcoeff[ci]);
int rate_low = 0;
const int rate = get_two_coeff_cost_simple(
ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
if (abs_dqc < abs_tqc) {
*accu_rate += rate;
return;
}
const int64_t dist = get_coeff_dist(tqc, dqc, shift);
const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
const int64_t rd = RDCOST(rdmult, rate, dist);
const int sign = (qc < 0) ? 1 : 0;
tran_low_t qc_low, dqc_low;
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
const tran_low_t abs_qc_low = abs_qc - 1;
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
const int rate_low = get_coeff_cost_simple(
ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels);
const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
if (rd_low < rd) {
qcoeff[ci] = qc_low;
dqcoeff[ci] = dqc_low;
const int sign = (qc < 0) ? 1 : 0;
qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
*accu_rate += rate_low;
} else {
@@ -1468,14 +1518,24 @@ static AOM_FORCE_INLINE void update_coeff_eob(
int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
tran_low_t qc_low, dqc_low;
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
const tran_low_t abs_qc_low = abs_qc - 1;
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
const int rate_low =
get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx,
txb_costs, bwl, tx_class, levels);
const int64_t rd_low =
RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
tran_low_t abs_qc_low;
int64_t dist_low, rd_low;
int rate_low;
if (abs_qc == 1) {
abs_qc_low = 0;
dqc_low = qc_low = 0;
dist_low = 0;
rate_low = txb_costs->base_cost[coeff_ctx][0];
rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
} else {
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
abs_qc_low = abs_qc - 1;
dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
rate_low =
get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
}
int lower_level_new_eob = 0;
const int new_eob = si + 1;
-6
View File
@@ -421,11 +421,9 @@ static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
(int32_t *)aom_memalign(
16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
(InterModesInfo *)aom_malloc(
sizeof(*thread_data->td->inter_modes_info)));
#endif
for (int x = 0; x < 2; x++)
for (int y = 0; y < 2; y++)
@@ -544,9 +542,7 @@ static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
thread_data->td->mb.inter_modes_info = thread_data->td->inter_modes_info;
#endif
for (int x = 0; x < 2; x++) {
for (int y = 0; y < 2; y++) {
memcpy(thread_data->td->hash_value_buffer[x][y],
@@ -662,9 +658,7 @@ void av1_encode_tiles_row_mt(AV1_COMP *cpi) {
this_tile->row_mt_info.current_mi_row = this_tile->tile_info.mi_row_start;
this_tile->row_mt_info.num_threads_working = 0;
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
av1_inter_mode_data_init(this_tile);
#endif
av1_zero_above_context(cm, &cpi->td.mb.e_mbd,
this_tile->tile_info.mi_col_start,
this_tile->tile_info.mi_col_end, tile_row);
+50 -353
View File
@@ -36,6 +36,7 @@
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encode_strategy.h"
#include "av1/encoder/extend.h"
#include "av1/encoder/firstpass.h"
#include "av1/encoder/mcomp.h"
@@ -380,7 +381,7 @@ static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
// Override the default variance function to use MSE.
v_fn_ptr.vf = get_block_variance_fn(bsize);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
}
@@ -449,18 +450,6 @@ static int find_fp_qindex(aom_bit_depth_t bit_depth) {
return i;
}
static void set_first_pass_params(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
if (!cpi->refresh_alt_ref_frame && (cm->current_frame.frame_number == 0 ||
(cpi->frame_flags & FRAMEFLAGS_KEY))) {
cm->current_frame.frame_type = KEY_FRAME;
} else {
cm->current_frame.frame_type = INTER_FRAME;
}
// Do not use periodic key frames.
cpi->rc.frames_to_key = INT_MAX;
}
static double raw_motion_error_stdev(int *raw_motion_err_list,
int raw_motion_err_counts) {
int64_t sum_raw_err = 0;
@@ -486,7 +475,7 @@ static double raw_motion_error_stdev(int *raw_motion_err_list,
#define UL_INTRA_THRESH 50
#define INVALID_ROW -1
void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->td.mb;
AV1_COMMON *const cm = &cpi->common;
@@ -558,7 +547,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
brightness_factor = 0.0;
neutral_count = 0.0;
set_first_pass_params(cpi);
// Do not use periodic key frames.
cpi->rc.frames_to_key = INT_MAX;
av1_set_quantizer(cm, qindex);
av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x,
@@ -701,14 +692,15 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
// Accumulate the intra error.
intra_error += (int64_t)this_error;
int stride = x->plane[0].src.stride;
const int hbd = is_cur_buf_hbd(xd);
const int stride = x->plane[0].src.stride;
uint8_t *buf = x->plane[0].src.buf;
for (int r8 = 0; r8 < 2; ++r8)
for (int r8 = 0; r8 < 2; ++r8) {
for (int c8 = 0; c8 < 2; ++c8) {
int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
}
}
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -730,7 +722,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
struct buf_2d unscaled_last_source_buf_2d;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
} else {
@@ -745,7 +737,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
cpi->unscaled_last_source->y_buffer + recon_yoffset;
unscaled_last_source_buf_2d.stride =
cpi->unscaled_last_source->y_stride;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
raw_motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
} else {
@@ -777,7 +769,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
int gf_motion_error;
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
gf_motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
} else {
@@ -854,8 +846,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
xd->mi[0]->tx_size = TX_4X4;
xd->mi[0]->ref_frame[0] = LAST_FRAME;
xd->mi[0]->ref_frame[1] = NONE_FRAME;
av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale,
mb_col * mb_scale, NULL, bsize);
av1_enc_build_inter_predictor(cm, xd, mb_row * mb_scale,
mb_col * mb_scale, NULL, bsize,
AOM_PLANE_Y, AOM_PLANE_Y);
av1_encode_sby_pass1(cm, x, bsize);
sum_mvr += mv.row;
sum_mvr_abs += abs(mv.row);
@@ -1038,7 +1031,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
// TODO(paulwilkins): Handle the case when duration is set to 0, or
// something less than the full time between subsequent values of
// cpi->source_time_stamp.
fps.duration = (double)(source->ts_end - source->ts_start);
fps.duration = (double)ts_duration;
// Don't want to do output stats with a stack variable!
twopass->this_frame_stats = fps;
@@ -1566,7 +1559,6 @@ static int calculate_boost_bits(int frame_count, int boost,
0);
}
#if USE_SYMM_MULTI_LAYER
// #define CHCEK_GF_PARAMETER
#ifdef CHCEK_GF_PARAMETER
void check_frame_params(GF_GROUP *const gf_group, int gf_interval,
@@ -1693,7 +1685,6 @@ static int construct_multi_layer_gf_structure(GF_GROUP *const gf_group,
// Given the maximum allowed height of the pyramid structure, return the maximum
// GF length supported by the same.
static INLINE int get_max_gf_length(int max_pyr_height) {
#if CONFIG_FIX_GF_LENGTH
// We allow a frame to have at most two left/right descendants before changing
// them into to a subtree, i.e., we allow the following structure:
/* OUT_OF_ORDER_FRAME
@@ -1710,9 +1701,6 @@ static INLINE int get_max_gf_length(int max_pyr_height) {
return MAX_GF_INTERVAL; // Special case: uses the old pyramid structure.
default: assert(0 && "Invalid max_pyr_height"); return -1;
}
#else
return 16;
#endif // CONFIG_FIX_GF_LENGTH
}
// Given the maximum allowed height of the pyramid structure, return the fixed
@@ -1722,11 +1710,12 @@ int av1_rc_get_fixed_gf_length(int max_pyr_height) {
return AOMMIN(max_gf_length_allowed, MAX_GF_INTERVAL);
}
static void define_customized_gf_group_structure(AV1_COMP *cpi) {
static void define_customized_gf_group_structure(
AV1_COMP *cpi, const EncodeFrameParams *const frame_params) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
const int key_frame = frame_params->frame_type == KEY_FRAME;
assert(rc->baseline_gf_interval >= MIN_GF_INTERVAL &&
rc->baseline_gf_interval <=
@@ -1796,142 +1785,11 @@ static void define_customized_gf_group_structure(AV1_COMP *cpi) {
// It is an example of how to define a GF stucture manually. The function will
// result in exactly the same GF group structure as
// define_customized_gf_group_structure() when rc->baseline_gf_interval == 4
#if USE_MANUAL_GF4_STRUCT
#define GF_INTERVAL_4 4
static const unsigned char gf4_multi_layer_params[][GF_FRAME_PARAMS] = {
{
// gf_group->index == 0 (Frame 0)
// It can also be KEY frame. Will assign the proper value
// in define_gf_group_structure
OVERLAY_UPDATE, // update_type (default value)
0, // arf_src_offset
0, // arf_pos_in_gf
0 // arf_update_idx
},
{
// gf_group->index == 1 (Frame 4)
ARF_UPDATE, // update_type
GF_INTERVAL_4 - 1, // arf_src_offset
0, // arf_pos_in_gf
0 // arf_update_idx
},
{
// gf_group->index == 2 (Frame 2)
INTNL_ARF_UPDATE, // update_type
(GF_INTERVAL_4 >> 1) - 1, // arf_src_offset
0, // arf_pos_in_gf
0 // arf_update_idx
},
{
// gf_group->index == 3 (Frame 1)
LAST_BIPRED_UPDATE, // update_type
0, // arf_src_offset
0, // arf_pos_in_gf
0 // arf_update_idx
},
{
// gf_group->index == 4 (Frame 2 - OVERLAY)
INTNL_OVERLAY_UPDATE, // update_type
0, // arf_src_offset
2, // arf_pos_in_gf
0 // arf_update_idx
},
{
// gf_group->index == 5 (Frame 3)
LF_UPDATE, // update_type
0, // arf_src_offset
0, // arf_pos_in_gf
1 // arf_update_idx
}
};
static int define_gf_group_structure_4(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
assert(rc->baseline_gf_interval == GF_INTERVAL_4);
const int gf_update_frames = rc->baseline_gf_interval + 2;
int frame_index;
for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
int param_idx = 0;
gf_group->bidir_pred_enabled[frame_index] = 0;
if (frame_index == 0) {
// gf_group->arf_src_offset[frame_index] = 0;
gf_group->brf_src_offset[frame_index] = 0;
gf_group->bidir_pred_enabled[frame_index] = 0;
// For key frames the frame target rate is already set and it
// is also the golden frame.
if (key_frame) continue;
gf_group->update_type[frame_index] =
gf4_multi_layer_params[frame_index][param_idx++];
if (rc->source_alt_ref_active) {
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
} else {
gf_group->update_type[frame_index] = GF_UPDATE;
}
param_idx++;
} else {
gf_group->update_type[frame_index] =
gf4_multi_layer_params[frame_index][param_idx++];
}
// setup other parameters
gf_group->rf_level[frame_index] =
update_type_2_rf_level(gf_group->update_type[frame_index]);
// == arf_src_offset ==
gf_group->arf_src_offset[frame_index] =
gf4_multi_layer_params[frame_index][param_idx++];
// == arf_pos_in_gf ==
gf_group->arf_pos_in_gf[frame_index] =
gf4_multi_layer_params[frame_index][param_idx++];
// == arf_update_idx ==
gf_group->brf_src_offset[frame_index] =
gf4_multi_layer_params[frame_index][param_idx];
}
// NOTE: We need to configure the frame at the end of the sequence + 1 that
// will be the start frame for the next group. Otherwise prior to the
// call to av1_rc_get_second_pass_params() the data will be undefined.
gf_group->arf_update_idx[frame_index] = 0;
gf_group->arf_ref_idx[frame_index] = 0;
if (rc->source_alt_ref_pending) {
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
} else {
gf_group->update_type[frame_index] = GF_UPDATE;
gf_group->rf_level[frame_index] = GF_ARF_STD;
}
gf_group->bidir_pred_enabled[frame_index] = 0;
gf_group->brf_src_offset[frame_index] = 0;
// This value is only used for INTNL_OVERLAY_UPDATE
gf_group->arf_pos_in_gf[frame_index] = 0;
return gf_update_frames;
}
#endif // USE_MANUAL_GF4_STRUCT
#endif // USE_SYMM_MULTI_LAYER
static void define_gf_group_structure(AV1_COMP *cpi) {
static void define_gf_group_structure(
AV1_COMP *cpi, const EncodeFrameParams *const frame_params) {
RATE_CONTROL *const rc = &cpi->rc;
#if USE_SYMM_MULTI_LAYER
const int max_pyr_height = cpi->oxcf.gf_max_pyr_height;
const int valid_customized_gf_length =
max_pyr_height >= MIN_PYRAMID_LVL && max_pyr_height <= MAX_PYRAMID_LVL &&
@@ -1940,24 +1798,18 @@ static void define_gf_group_structure(AV1_COMP *cpi) {
// used the new structure only if extra_arf is allowed
if (valid_customized_gf_length && rc->source_alt_ref_pending &&
cpi->extra_arf_allowed > 0) {
#if USE_MANUAL_GF4_STRUCT
if (rc->baseline_gf_interval == 4)
define_gf_group_structure_4(cpi);
else
#endif
define_customized_gf_group_structure(cpi);
define_customized_gf_group_structure(cpi, frame_params);
cpi->new_bwdref_update_rule = 1;
return;
} else {
cpi->new_bwdref_update_rule = 0;
}
#endif
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
int i;
int frame_index = 0;
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
const int key_frame = frame_params->frame_type == KEY_FRAME;
// The use of bi-predictive frames are only enabled when following 3
// conditions are met:
@@ -2168,35 +2020,28 @@ static void define_gf_group_structure(AV1_COMP *cpi) {
gf_group->brf_src_offset[frame_index] = 0;
}
#if USE_SYMM_MULTI_LAYER
#define NEW_MULTI_LVL_BOOST_VBR_ALLOC 1
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
#define LEAF_REDUCTION_FACTOR 0.75
static double lvl_budget_factor[MAX_PYRAMID_LVL - 1][MAX_PYRAMID_LVL - 1] = {
{ 1.0, 0.0, 0.0 }, { 0.6, 0.4, 0 }, { 0.45, 0.35, 0.20 }
};
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
#endif // USE_SYMM_MULTI_LAYER
static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
double group_error, int gf_arf_bits) {
static void allocate_gf_group_bits(
AV1_COMP *cpi, int64_t gf_group_bits, double group_error, int gf_arf_bits,
const EncodeFrameParams *const frame_params) {
RATE_CONTROL *const rc = &cpi->rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
int i;
int frame_index = 0;
int key_frame;
const int key_frame = frame_params->frame_type == KEY_FRAME;
const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
int64_t total_group_bits = gf_group_bits;
int ext_arf_boost[MAX_EXT_ARFS];
define_gf_group_structure(cpi);
define_gf_group_structure(cpi, frame_params);
av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
// For key frames the frame target rate is already set and it
// is also the golden frame.
// === [frame_index == 0] ===
@@ -2232,13 +2077,9 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
}
}
#if USE_SYMM_MULTI_LAYER
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
// Save.
const int tmp_frame_index = frame_index;
int budget_reduced_from_leaf_level = 0;
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
#endif // USE_SYMM_MULTI_LAYER
// Allocate bits to the other frames in the group.
const int normal_frames =
@@ -2269,7 +2110,6 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
// TODO(zoeliu): To investigate whether the allocated bits on
// BIPRED_UPDATE frames need to be further adjusted.
gf_group->bit_allocation[frame_index] = target_frame_size;
#if USE_SYMM_MULTI_LAYER
} else if (cpi->new_bwdref_update_rule &&
gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE) {
assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL &&
@@ -2280,23 +2120,16 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
gf_group->bit_allocation[arf_pos] = target_frame_size;
// Note: Boost, if needed, is added in the next loop.
#endif // USE_SYMM_MULTI_LAYER
} else {
assert(gf_group->update_type[frame_index] == LF_UPDATE ||
gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
gf_group->bit_allocation[frame_index] = target_frame_size;
#if MULTI_LVL_BOOST_VBR_CQ
if (cpi->new_bwdref_update_rule) {
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
const int this_budget_reduction =
(int)(target_frame_size * LEAF_REDUCTION_FACTOR);
gf_group->bit_allocation[frame_index] -= this_budget_reduction;
budget_reduced_from_leaf_level += this_budget_reduction;
#else
gf_group->bit_allocation[frame_index] -= (target_frame_size >> 1);
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
}
#endif // MULTI_LVL_BOOST_VBR_CQ
}
++frame_index;
@@ -2308,8 +2141,6 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
}
}
#if USE_SYMM_MULTI_LAYER
#if MULTI_LVL_BOOST_VBR_CQ
if (budget_reduced_from_leaf_level > 0) {
// Restore.
frame_index = tmp_frame_index;
@@ -2323,16 +2154,11 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
const int arf_pos = gf_group->arf_pos_in_gf[frame_index];
const int this_lvl = gf_group->pyramid_level[arf_pos];
const int dist2top = gf_group->pyramid_height - 1 - this_lvl;
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
const double lvl_boost_factor =
lvl_budget_factor[gf_group->pyramid_height - 2][dist2top];
const int extra_size =
(int)(budget_reduced_from_leaf_level * lvl_boost_factor /
gf_group->pyramid_lvl_nodes[this_lvl]);
#else
const int target_frame_size = gf_group->bit_allocation[arf_pos];
const int extra_size = target_frame_size >> dist2top;
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
gf_group->bit_allocation[arf_pos] += extra_size;
}
++frame_index;
@@ -2344,14 +2170,8 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
}
}
}
#endif // MULTI_LVL_BOOST_VBR_CQ
#endif // USE_SYMM_MULTI_LAYER
#if USE_SYMM_MULTI_LAYER
if (cpi->new_bwdref_update_rule == 0 && rc->source_alt_ref_pending) {
#else
if (rc->source_alt_ref_pending) {
#endif
if (cpi->num_extra_arfs) {
// NOTE: For bit allocation, move the allocated bits associated with
// INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
@@ -2379,12 +2199,11 @@ static INLINE int is_almost_static(double gf_zero_motion, int kf_zero_motion) {
(kf_zero_motion >= STATIC_KF_GROUP_THRESH);
}
#if CONFIG_FIX_GF_LENGTH
#define ARF_ABS_ZOOM_THRESH 4.4
#endif // CONFIG_FIX_GF_LENGTH
// Analyse and define a gf/arf group.
static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
const EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
AV1EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2394,10 +2213,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int i;
double boost_score = 0.0;
#if !CONFIG_FIX_GF_LENGTH
double old_boost_score = 0.0;
int active_max_gf_interval;
#endif // !CONFIG_FIX_GF_LENGTH
int active_min_gf_interval;
double gf_group_err = 0.0;
#if GROUP_ADAPTIVE_MAXQ
@@ -2427,14 +2242,15 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int64_t gf_group_bits;
double gf_group_error_left;
int gf_arf_bits;
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
const int is_intra_only = frame_params->frame_type == KEY_FRAME ||
frame_params->frame_type == INTRA_ONLY_FRAME;
const int arf_active_or_kf = is_intra_only || rc->source_alt_ref_active;
cpi->extra_arf_allowed = 1;
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
if (!is_intra_only) {
av1_zero(twopass->gf_group);
}
@@ -2462,35 +2278,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const double mv_ratio_accumulator_thresh =
(cpi->initial_height + cpi->initial_width) / 4.0;
#if CONFIG_FIX_GF_LENGTH
// TODO(urvang): Try the 'else' like logic to vary min and max interval.
// TODO(urvang): Try logic to vary min and max interval based on q.
active_min_gf_interval = rc->min_gf_interval;
#else
// Set a maximum and minimum interval for the GF group.
// If the image appears almost completely static we can extend beyond this.
{
int int_max_q = (int)(av1_convert_qindex_to_q(
twopass->active_worst_quality, cpi->common.seq_params.bit_depth));
int int_lbq = (int)(av1_convert_qindex_to_q(
rc->last_boosted_qindex, cpi->common.seq_params.bit_depth));
active_min_gf_interval = rc->min_gf_interval + AOMMIN(2, int_max_q / 200);
if (active_min_gf_interval > rc->max_gf_interval)
active_min_gf_interval = rc->max_gf_interval;
// The value chosen depends on the active Q range. At low Q we have
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
active_max_gf_interval = 12 + AOMMIN(4, (int_lbq / 6));
// We have: active_min_gf_interval <= rc->max_gf_interval
if (active_max_gf_interval < active_min_gf_interval)
active_max_gf_interval = active_min_gf_interval;
else if (active_max_gf_interval > rc->max_gf_interval)
active_max_gf_interval = rc->max_gf_interval;
}
#endif // CONFIG_FIX_GF_LENGTH
double avg_sr_coded_error = 0;
double avg_raw_err_stdev = 0;
@@ -2552,7 +2341,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
boost_score +=
decay_accumulator *
calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
#if CONFIG_FIX_GF_LENGTH
// If almost totally static, we will not use the the fixed GF length later,
// so we can continue for more frames.
if (i >= (av1_rc_get_fixed_gf_length(oxcf->gf_max_pyr_height) + 1) &&
@@ -2570,39 +2358,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH)) {
break;
}
#else
// Break out conditions.
// Break at maximum of active_max_gf_interval unless almost totally static.
//
// Note that the addition of a test of rc->source_alt_ref_active is
// deliberate. The effect of this is that after a normal altref group even
// if the material is static there will be one normal length GF group
// before allowing longer GF groups. The reason for this is that in cases
// such as slide shows where slides are separated by a complex transition
// such as a fade, the arf group spanning the transition may not be coded
// at a very high quality and hence this frame (with its overlay) is a
// poor golden frame to use for an extended group.
if ((i >= (active_max_gf_interval + arf_active_or_kf) &&
((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval + arf_active_or_kf) &&
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) ||
((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
// If GF group interval is < 12, we force it to be 8. Otherwise,
// if it is >= 12, we keep it as is.
// NOTE: 'i' is 1 more than the GF group interval candidate that is being
// checked.
if (i == (8 + 1) || i >= (12 + 1)) {
boost_score = old_boost_score;
break;
}
}
old_boost_score = boost_score;
#endif // CONFIG_FIX_GF_LENGTH
*this_frame = next_frame;
}
twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
@@ -2638,7 +2393,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#define REDUCE_GF_LENGTH_TO_KEY_THRESH 9
#define REDUCE_GF_LENGTH_BY 1
int alt_offset = 0;
#if REDUCE_LAST_GF_LENGTH
// The length reduction strategy is tweaked using AOM_Q mode, and doesn't work
// for VBR mode.
// Also, we don't have do adjustment for lossless mode.
@@ -2670,7 +2424,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
i -= roll_back;
}
}
#endif // REDUCE_LAST_GF_LENGTH
// Should we use the alternate reference frame.
if (use_alt_ref) {
@@ -2713,7 +2466,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
}
#if REDUCE_LAST_ALT_BOOST
#define LAST_ALR_BOOST_FACTOR 0.2f
rc->arf_boost_factor = 1.0;
if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf)) {
@@ -2723,7 +2475,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
}
}
#endif
if (!cpi->extra_arf_allowed) {
cpi->num_extra_arfs = 0;
@@ -2732,7 +2483,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Note: When new pyramid structure is used through
// 'define_customized_gf_group_structure()' function, this value is
// overridden.
#if USE_SYMM_MULTI_LAYER
if (rc->baseline_gf_interval == MIN_GF_INTERVAL &&
rc->source_alt_ref_pending) {
cpi->num_extra_arfs = 1;
@@ -2741,18 +2491,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->source_alt_ref_pending,
oxcf->gf_max_pyr_height);
}
#else
cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
rc->source_alt_ref_pending,
oxcf->gf_max_pyr_height);
#endif // USE_SYMM_MULTI_LAYER
}
#if !USE_SYMM_MULTI_LAYER
// Currently at maximum two extra ARFs' are allowed
assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
#endif
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
rc->bipred_group_interval = BFG_INTERVAL;
@@ -2814,20 +2554,21 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// also a key frame in which case it has already been accounted for.
if (rc->source_alt_ref_pending) {
gf_group_error_left = gf_group_err - mod_frame_err;
} else if (is_key_frame == 0) {
} else if (!is_intra_only) {
gf_group_error_left = gf_group_err - gf_first_frame_err;
} else {
gf_group_error_left = gf_group_err;
}
// Allocate bits to each of the frames in the GF group.
allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits,
frame_params);
// Reset the file position.
reset_fpf_position(twopass, start_pos);
// Calculate a section intra ratio used in setting max loop filter.
if (cpi->common.current_frame.frame_type != KEY_FRAME) {
if (frame_params->frame_type != KEY_FRAME) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
}
@@ -2966,7 +2707,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
av1_zero(next_frame);
cpi->common.current_frame.frame_type = KEY_FRAME;
rc->frames_since_key = 0;
// Reset the GF group data structures.
@@ -3195,51 +2935,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->modified_error_left -= kf_group_err;
}
void av1_configure_buffer_updates_firstpass(AV1_COMP *cpi,
FRAME_UPDATE_TYPE update_type) {
RATE_CONTROL *rc = &cpi->rc;
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
rc->is_bwd_ref_frame = 0;
switch (update_type) {
case ARF_UPDATE:
cpi->refresh_alt_ref_frame = 1;
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
rc->is_src_frame_alt_ref = 0;
break;
case INTNL_ARF_UPDATE:
cpi->refresh_alt2_ref_frame = 1;
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
rc->is_src_frame_alt_ref = 0;
rc->is_src_frame_ext_arf = 0;
break;
case BIPRED_UPDATE:
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
rc->is_bwd_ref_frame = 1;
break;
default: break;
}
}
static int is_skippable_frame(const AV1_COMP *cpi) {
// If the current frame does not have non-zero motion vector detected in the
// first pass, and so do its previous and forward frames, then this frame
@@ -3259,7 +2954,8 @@ static int is_skippable_frame(const AV1_COMP *cpi) {
twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
}
void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
void av1_rc_get_second_pass_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
CurrentFrame *const current_frame = &cm->current_frame;
RATE_CONTROL *const rc = &cpi->rc;
@@ -3278,16 +2974,16 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
// advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
av1_configure_buffer_updates(cpi);
av1_configure_buffer_updates(cpi, gf_group->update_type[gf_group->index]);
target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
if (cpi->no_show_kf) {
assert(gf_group->update_type[gf_group->index] == ARF_UPDATE);
current_frame->frame_type = KEY_FRAME;
frame_params->frame_type = KEY_FRAME;
} else {
current_frame->frame_type = INTER_FRAME;
frame_params->frame_type = INTER_FRAME;
}
// Do the firstpass stats indicate that this frame is skippable for the
@@ -3342,16 +3038,17 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
FIRSTPASS_STATS this_frame_copy;
this_frame_copy = this_frame;
frame_params->frame_type = KEY_FRAME;
// Define next KF group and assign bits to it.
find_next_key_frame(cpi, &this_frame);
this_frame = this_frame_copy;
} else {
current_frame->frame_type = INTER_FRAME;
frame_params->frame_type = INTER_FRAME;
}
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
define_gf_group(cpi, &this_frame);
define_gf_group(cpi, &this_frame, frame_params);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -3369,7 +3066,7 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
#endif
}
av1_configure_buffer_updates(cpi);
av1_configure_buffer_updates(cpi, gf_group->update_type[gf_group->index]);
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
@@ -3379,7 +3076,7 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
target_rate = gf_group->bit_allocation[gf_group->index];
if (cpi->common.current_frame.frame_type == KEY_FRAME)
if (frame_params->frame_type == KEY_FRAME)
target_rate = av1_rc_clamp_iframe_target_size(cpi, target_rate);
else
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
+4 -6
View File
@@ -114,12 +114,10 @@ typedef struct {
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
#if USE_SYMM_MULTI_LAYER
unsigned char arf_pos_in_gf[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char pyramid_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char pyramid_height;
unsigned char pyramid_lvl_nodes[MAX_PYRAMID_LVL];
#endif // USE_SYMM_MULTI_LAYER
unsigned char brf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char bidir_pred_enabled[MAX_STATIC_GF_GROUP_LENGTH + 1];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
@@ -173,16 +171,16 @@ typedef struct {
} TWO_PASS;
struct AV1_COMP;
struct EncodeFrameParams;
void av1_init_first_pass(struct AV1_COMP *cpi);
void av1_rc_get_first_pass_params(struct AV1_COMP *cpi);
void av1_first_pass(struct AV1_COMP *cpi, const struct lookahead_entry *source);
void av1_first_pass(struct AV1_COMP *cpi, const int64_t ts_duration);
void av1_end_first_pass(struct AV1_COMP *cpi);
void av1_init_second_pass(struct AV1_COMP *cpi);
void av1_rc_get_second_pass_params(struct AV1_COMP *cpi);
void av1_configure_buffer_updates_firstpass(struct AV1_COMP *cpi,
FRAME_UPDATE_TYPE update_type);
void av1_rc_get_second_pass_params(
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
// Post encode update of the rate control parameters for 2-pass
void av1_twopass_postencode_update(struct AV1_COMP *cpi);
+2 -2
View File
@@ -71,8 +71,8 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
xd->mi[0]->mv[0] = x->best_mv;
xd->mi[0]->ref_frame[1] = NONE_FRAME;
av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
BLOCK_16X16);
av1_enc_build_inter_predictor(&cpi->common, xd, mb_row, mb_col, NULL,
BLOCK_16X16, AOM_PLANE_Y, AOM_PLANE_Y);
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
+7 -7
View File
@@ -336,7 +336,7 @@ static unsigned int setup_center_error(
int *mvcost[2], unsigned int *sse1, int *distortion) {
unsigned int besterr;
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
if (mask) {
@@ -641,7 +641,7 @@ static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
int mask_stride, int invert_mask, int w, int h,
unsigned int *sse, int subpel_search) {
unsigned int besterr;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
if (second_pred != NULL) {
@@ -899,7 +899,8 @@ unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
unsigned int mse;
unsigned int sse;
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
AOM_PLANE_Y, AOM_PLANE_Y);
mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, x->mv_cost_stack,
x->errorperbit);
@@ -2213,9 +2214,8 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
: av1_get_ref_frame_hash_map(&cpi->common,
x->e_mbd.mi[0]->ref_frame[0]);
av1_get_block_hash_value(
what, what_stride, block_width, &hash_value1, &hash_value2,
x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
&hash_value2, is_cur_buf_hbd(&x->e_mbd), x);
const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
// for intra, at lest one matching can be found, itself.
@@ -2334,7 +2334,7 @@ static int upsampled_obmc_pref_error(
unsigned int besterr;
DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
+71
View File
@@ -3784,6 +3784,77 @@ static const NN_CONFIG simple_motion_search_prune_part_nn_config_8 = {
#undef NUM_LAYER_0_UNITS_8
#undef NUM_LOGITS_8
#define FEATURE_SIZE 19
static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f,
0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f,
0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f,
};
static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f,
-1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f,
-0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f,
0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f,
};
static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f,
-0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f,
-1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f,
0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f,
};
static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f,
-0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f,
-1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f,
-0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f,
};
static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f,
-0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f,
-2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f,
0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f,
};
static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
-1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f,
-0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f,
3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f,
-0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f,
};
static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
-1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f,
-4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f,
1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f,
-0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f,
};
static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
-2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f,
-2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f,
0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f,
-0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f,
};
static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
-1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f,
-3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f,
0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f,
-0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f,
};
static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
-1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f,
-1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f,
0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f,
0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
};
#ifdef __cplusplus
} // extern "C"
#endif
+18 -165
View File
@@ -29,6 +29,7 @@
#include "av1/common/seg_common.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encode_strategy.h"
#include "av1/encoder/random.h"
#include "av1/encoder/ratectrl.h"
@@ -558,13 +559,11 @@ static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
arfgf_low_motion_minq, arfgf_high_motion_minq);
}
#if REDUCE_LAST_ALT_BOOST
static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) {
int *arfgf_high_motion_minq;
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
return arfgf_high_motion_minq[q];
}
#endif
static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
@@ -965,12 +964,8 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
const int bit_depth = cm->seq_params.bit_depth;
ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
#if CUSTOMIZED_GF
const int is_intrl_arf_boost =
gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
#else
const int is_intrl_arf_boost = cpi->refresh_alt2_ref_frame;
#endif // CUSTOMIZED_GF
if (frame_is_intra_only(cm)) {
if (rc->frames_to_key == 1 && oxcf->rc_mode == AOM_Q) {
@@ -1053,17 +1048,14 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
(is_intrl_arf_boost && !cpi->new_bwdref_update_rule)) {
#if REDUCE_LAST_ALT_BOOST
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
const int boost = min_boost - active_best_quality;
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
}
#endif // REDUCE_LAST_ALT_BOOST
*arf_q = active_best_quality;
} else if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
assert(rc->arf_q >= 0); // Ensure it is set to a valid value.
@@ -1074,7 +1066,6 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
++this_height;
}
}
#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
} else if (oxcf->rc_mode == AOM_Q) {
if (!cpi->refresh_alt_ref_frame && !is_intrl_arf_boost) {
active_best_quality = cq_level;
@@ -1082,17 +1073,14 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
active_best_quality = get_gf_active_quality(rc, q, bit_depth);
*arf_q = active_best_quality;
#if REDUCE_LAST_ALT_BOOST
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
const int boost = min_boost - active_best_quality;
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
#endif
} else {
assert(rc->arf_q >= 0); // Ensure it is set to a valid value.
active_best_quality = rc->arf_q;
}
#if USE_SYMM_MULTI_LAYER
if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
int this_height = gf_group_pyramid_level(cpi);
while (this_height < gf_group->pyramid_height) {
@@ -1100,24 +1088,18 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
++this_height;
}
} else {
#endif
// Modify best quality for second level arfs. For mode AOM_Q this
// becomes the baseline frame q.
if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
active_best_quality = (active_best_quality + cq_level + 1) / 2;
#if USE_SYMM_MULTI_LAYER
}
#endif
}
} else {
active_best_quality = get_gf_active_quality(rc, q, bit_depth);
#if REDUCE_LAST_ALT_BOOST
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
const int boost = min_boost - active_best_quality;
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
#endif
#if USE_SYMM_MULTI_LAYER
if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
int this_height = gf_group_pyramid_level(cpi);
while (this_height < gf_group->pyramid_height) {
@@ -1126,7 +1108,6 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
++this_height;
}
}
#endif
}
} else {
if (oxcf->rc_mode == AOM_Q) {
@@ -1293,16 +1274,12 @@ static void update_alt_ref_frame_stats(AV1_COMP *cpi) {
static void update_golden_frame_stats(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
#if CUSTOMIZED_GF
const TWO_PASS *const twopass = &cpi->twopass;
const GF_GROUP *const gf_group = &twopass->gf_group;
const int is_intrnl_arf =
cpi->oxcf.pass == 2
? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
: cpi->refresh_alt2_ref_frame;
#else
const int is_intnl_arf = cpi->refresh_alt2_ref_frame;
#endif
// Update the Golden frame usage counts.
// NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame,
@@ -1328,127 +1305,6 @@ static void update_golden_frame_stats(AV1_COMP *cpi) {
}
}
// Define the reference buffers that will be updated post encode.
void av1_configure_buffer_updates(AV1_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
// NOTE(weitinglin): Should we define another function to take care of
// cpi->rc.is_$Source_Type to make this function as it is in the comment?
cpi->rc.is_src_frame_alt_ref = 0;
cpi->rc.is_bwd_ref_frame = 0;
cpi->rc.is_last_bipred_frame = 0;
cpi->rc.is_bipred_frame = 0;
cpi->rc.is_src_frame_ext_arf = 0;
switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
case KF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 1;
cpi->refresh_alt_ref_frame = 1;
break;
case LF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
case GF_UPDATE:
// TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
// needed.
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
case OVERLAY_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
case ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
// NOTE: BWDREF does not get updated along with ALTREF_FRAME.
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 1;
break;
case BRF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_bwd_ref_frame = 1;
break;
case LAST_BIPRED_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_last_bipred_frame = 1;
break;
case BIPRED_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_bipred_frame = 1;
break;
case INTNL_OVERLAY_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
cpi->rc.is_src_frame_ext_arf = 1;
break;
case INTNL_ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
#if USE_SYMM_MULTI_LAYER
if (cpi->new_bwdref_update_rule == 1) {
cpi->refresh_bwd_ref_frame = 1;
cpi->refresh_alt2_ref_frame = 0;
} else {
#endif
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt2_ref_frame = 1;
#if USE_SYMM_MULTI_LAYER
}
#endif
cpi->refresh_alt_ref_frame = 0;
break;
default: assert(0); break;
}
}
void av1_estimate_qp_gop(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
int gop_length = cpi->rc.baseline_gf_interval;
@@ -1463,30 +1319,28 @@ void av1_estimate_qp_gop(AV1_COMP *cpi) {
cpi->twopass.gf_group.index = idx;
rc_set_frame_target(cpi, target_rate, cm->width, cm->height);
av1_configure_buffer_updates(cpi);
av1_configure_buffer_updates(
cpi, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]);
tpl_frame->base_qindex = rc_pick_q_and_bounds_two_pass(
cpi, cm->width, cm->height, &bottom_index, &top_index, &arf_q);
tpl_frame->base_qindex = AOMMAX(tpl_frame->base_qindex, 1);
}
// Reset the actual index and frame update
cpi->twopass.gf_group.index = gf_index;
av1_configure_buffer_updates(cpi);
av1_configure_buffer_updates(
cpi, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]);
}
void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
const AV1_COMMON *const cm = &cpi->common;
const CurrentFrame *const current_frame = &cm->current_frame;
RATE_CONTROL *const rc = &cpi->rc;
#if CUSTOMIZED_GF
const TWO_PASS *const twopass = &cpi->twopass;
const GF_GROUP *const gf_group = &twopass->gf_group;
const int is_intrnl_arf =
cpi->oxcf.pass == 2
? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
: cpi->refresh_alt2_ref_frame;
#else
const int is_intrnl_arf = cpi->refresh_alt2_ref_frame;
#endif
const int qindex = cm->base_qindex;
@@ -1618,7 +1472,8 @@ static int calc_iframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
return av1_rc_clamp_iframe_target_size(cpi, target);
}
void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
CurrentFrame *const current_frame = &cm->current_frame;
@@ -1632,44 +1487,41 @@ void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
(current_frame->frame_number == 0 ||
(cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && 0))) {
current_frame->frame_type = KEY_FRAME;
frame_params->frame_type = KEY_FRAME;
rc->this_key_frame_forced =
current_frame->frame_number != 0 && rc->frames_to_key == 0;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
rc->source_alt_ref_active = 0;
} else {
current_frame->frame_type = INTER_FRAME;
frame_params->frame_type = INTER_FRAME;
if (sframe_enabled) {
if (altref_enabled) {
if (sframe_mode == 1) {
// sframe_mode == 1: insert sframe if it matches altref frame.
if (current_frame->frame_number % sframe_dist == 0 &&
current_frame->frame_type != KEY_FRAME &&
current_frame->frame_number != 0 && cpi->refresh_alt_ref_frame) {
current_frame->frame_type = S_FRAME;
frame_params->frame_type = S_FRAME;
}
} else {
// sframe_mode != 1: if sframe will be inserted at the next available
// altref frame
if (current_frame->frame_number % sframe_dist == 0 &&
current_frame->frame_type != KEY_FRAME &&
current_frame->frame_number != 0) {
rc->sframe_due = 1;
}
if (rc->sframe_due && cpi->refresh_alt_ref_frame) {
current_frame->frame_type = S_FRAME;
frame_params->frame_type = S_FRAME;
rc->sframe_due = 0;
}
}
} else {
if (current_frame->frame_number % sframe_dist == 0 &&
current_frame->frame_type != KEY_FRAME &&
current_frame->frame_number != 0) {
current_frame->frame_type = S_FRAME;
frame_params->frame_type = S_FRAME;
}
}
}
@@ -1692,7 +1544,7 @@ void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
av1_cyclic_refresh_update_parameters(cpi);
if (current_frame->frame_type == KEY_FRAME)
if (frame_params->frame_type == KEY_FRAME)
target = calc_iframe_target_size_one_pass_vbr(cpi);
else
target = calc_pframe_target_size_one_pass_vbr(cpi);
@@ -1758,7 +1610,8 @@ static int calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
return av1_rc_clamp_iframe_target_size(cpi, target);
}
void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
CurrentFrame *const current_frame = &cm->current_frame;
@@ -1767,14 +1620,14 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
if ((current_frame->frame_number == 0 ||
(cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && 0))) {
current_frame->frame_type = KEY_FRAME;
frame_params->frame_type = KEY_FRAME;
rc->this_key_frame_forced =
current_frame->frame_number != 0 && rc->frames_to_key == 0;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
rc->source_alt_ref_active = 0;
} else {
current_frame->frame_type = INTER_FRAME;
frame_params->frame_type = INTER_FRAME;
}
if (rc->frames_till_gf_update_due == 0) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
@@ -1795,7 +1648,7 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
av1_cyclic_refresh_update_parameters(cpi);
if (current_frame->frame_type == KEY_FRAME)
if (frame_params->frame_type == KEY_FRAME)
target = calc_iframe_target_size_one_pass_cbr(cpi);
else
target = calc_pframe_target_size_one_pass_cbr(cpi);
+5 -23
View File
@@ -34,27 +34,10 @@ extern "C" {
// The maximum duration of a GF group that is static (e.g. a slide show).
#define MAX_STATIC_GF_GROUP_LENGTH 250
#define CUSTOMIZED_GF 1
#if CONFIG_FIX_GF_LENGTH
// Minimum and maximum height for the new pyramid structure.
// (Old structure supports height = 1, but does NOT support height = 4).
#define MIN_PYRAMID_LVL 2
#define MAX_PYRAMID_LVL 4
#define USE_SYMM_MULTI_LAYER 1
#define REDUCE_LAST_ALT_BOOST 1
#define REDUCE_LAST_GF_LENGTH 1
#define MULTI_LVL_BOOST_VBR_CQ 1
#else
#define USE_SYMM_MULTI_LAYER 0
#define REDUCE_LAST_ALT_BOOST 0
#define REDUCE_LAST_GF_LENGTH 0
#define MULTI_LVL_BOOST_VBR_CQ 0
#endif
#if USE_SYMM_MULTI_LAYER
#define USE_MANUAL_GF4_STRUCT 0
#endif
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
@@ -191,9 +174,7 @@ int av1_rc_get_default_min_gf_interval(int width, int height, double framerate);
int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate,
int max_pyr_height);
#if CONFIG_FIX_GF_LENGTH
int av1_rc_get_fixed_gf_length(int max_pyr_height);
#endif // CONFIG_FIX_GF_LENGTH
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
@@ -218,8 +199,11 @@ int av1_rc_get_fixed_gf_length(int max_pyr_height);
// Functions to set parameters for encoding before the actual
// encode_frame_to_data_rate() function.
void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
struct EncodeFrameParams;
void av1_rc_get_one_pass_vbr_params(
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
void av1_rc_get_one_pass_cbr_params(
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
// Post encode update of the rate control parameters based
// on bytes used
@@ -283,8 +267,6 @@ void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height);
int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
void av1_configure_buffer_updates(struct AV1_COMP *cpi);
void av1_estimate_qp_gop(struct AV1_COMP *cpi);
#ifdef __cplusplus
+135 -94
View File
@@ -508,6 +508,17 @@ void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
pcost->base_cost[ctx][4] = 0;
pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
av1_cost_literal(1) -
pcost->base_cost[ctx][0];
pcost->base_cost[ctx][6] =
pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
pcost->base_cost[ctx][7] =
pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
}
for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
@@ -538,6 +549,14 @@ void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
// printf("%5d ", pcost->lps_cost[ctx][i]);
// printf("\n");
}
for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
pcost->lps_cost[ctx][0];
for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
}
}
}
}
}
@@ -698,6 +717,10 @@ static const uint8_t bsize_model_cat_lookup[BLOCK_SIZES_ALL] = {
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2
};
static int sse_norm_model_cat_lookup(double sse_norm) {
return (sse_norm > 16.0);
}
static const double interp_rgrid_surf[4][33 * 18] = {
{
29.726102, 30.738006, 25.294088, 25.736759, 41.255961,
@@ -1273,8 +1296,9 @@ static const double interp_dgrid_surf[33 * 18] = {
0.007205, 0.007205, 0.007203, 0.004341, 0.004340, 0.004338,
};
void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
double *rate_f, double *dist_f) {
void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
double yl, double *rate_f, double *dist_f) {
(void)sse_norm;
const double x_start = -0.5;
const double x_end = 16.5;
const double x_step = 1.0;
@@ -1283,7 +1307,7 @@ void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
const double y_step = 1.0;
const double epsilon = 1e-6;
const int stride = (int)rint((x_end - x_start) / x_step) + 1;
const int cat = bsize_model_cat_lookup[bsize];
const int rcat = bsize_model_cat_lookup[bsize];
(void)y_end;
xm = AOMMAX(xm, x_start + x_step + epsilon);
@@ -1301,7 +1325,7 @@ void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
const double yo = y - yi;
const double xo = x - xi;
const double *prate = &interp_rgrid_surf[cat][(yi - 1) * stride + (xi - 1)];
const double *prate = &interp_rgrid_surf[rcat][(yi - 1) * stride + (xi - 1)];
const double *pdist = &interp_dgrid_surf[(yi - 1) * stride + (xi - 1)];
*rate_f = interp_bicubic(prate, stride, xo, yo);
*dist_f = interp_bicubic(pdist, stride, xo, yo);
@@ -1311,85 +1335,102 @@ static const double interp_rgrid_curv[4][65] = {
{
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 23.801499, 28.387688, 33.388795, 42.298282,
41.525408, 51.597692, 49.566271, 54.632979, 60.321507,
67.730678, 75.766165, 85.324032, 96.600012, 120.839562,
173.917577, 255.974908, 354.107573, 458.063476, 562.345966,
668.568424, 772.072881, 878.598490, 982.202274, 1082.708946,
1188.037853, 1287.702240, 1395.588773, 1490.825830, 1584.231230,
1691.386090, 1766.822555, 1869.630904, 1926.743565, 2002.949495,
2047.431137, 2138.486068, 2154.743767, 2209.242472, 2278.252010,
2298.028834, 2302.326180, 2293.979995, 2275.826226, 2250.700821,
2221.439725, 2190.878887, 2161.854252, 2137.201768, 2119.757381,
2112.357039, 2117.836689, 2139.032277, 2178.779750, 2239.915056,
},
{
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
11.561347, 12.578139, 14.205101, 16.770584, 19.094853,
21.330863, 23.298907, 26.901921, 34.501017, 57.891733,
112.234763, 194.853189, 288.302032, 380.499422, 472.625309,
560.226809, 647.928463, 734.155122, 817.489721, 906.265783,
999.260562, 1094.489206, 1197.062998, 1293.296825, 1378.926484,
1472.760990, 1552.663779, 1635.196884, 1692.451951, 1759.741063,
1822.162720, 1916.515921, 1966.686071, 2031.647506, 2031.381029,
2067.971335, 2203.662704, 2500.257936, 3019.559830, 3823.371186,
4973.494802, 6531.733478, 8559.890013, 11119.767206, 14273.167855,
18081.894761, 22607.750723, 27912.538538, 34058.061008, 41106.120930,
2047.431137, 2138.486068, 2154.743767, 2209.242472, 2277.593051,
2290.996432, 2307.452938, 2343.567091, 2397.654644, 2469.425868,
2558.591037, 2664.860422, 2787.944296, 2927.552932, 3083.396602,
3255.185579, 3442.630134, 3645.440541, 3863.327072, 4096.000000,
},
{
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 8.998436, 9.439592, 9.731837, 10.865931,
11.561347, 12.578139, 14.205101, 16.770584, 19.094853,
21.330863, 23.298907, 26.901921, 34.501017, 57.891733,
112.234763, 194.853189, 288.302032, 380.499422, 472.625309,
560.226809, 647.928463, 734.155122, 817.489721, 906.265783,
999.260562, 1094.489206, 1197.062998, 1293.296825, 1378.926484,
1472.760990, 1552.663779, 1635.196884, 1692.451951, 1759.741063,
1822.162720, 1916.515921, 1966.686071, 2031.647506, 2033.700134,
2087.847688, 2161.688858, 2242.536028, 2334.023491, 2436.337802,
2549.665519, 2674.193198, 2810.107395, 2957.594666, 3116.841567,
3288.034655, 3471.360486, 3667.005616, 3875.156602, 4096.000000,
},
{
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 2.377584, 2.557185, 2.732445, 2.851114,
3.281800, 3.765589, 4.342578, 5.145582, 5.611038,
6.642238, 7.945977, 11.800522, 17.346624, 37.501413,
87.216800, 165.860942, 253.865564, 332.039345, 408.518863,
478.120452, 547.268590, 616.067676, 680.022540, 753.863541,
834.529973, 919.489191, 1008.264989, 1092.230318, 1173.971886,
1249.514122, 1330.510941, 1399.523249, 1466.923387, 1530.533471,
1586.515722, 1695.197774, 1746.648696, 1837.136959, 1909.056910,
1974.948082, 2063.374132, 2178.496387, 2324.476176, 2505.474827,
2725.653666, 2989.174023, 3300.197225, 3662.884600, 4081.397476,
4559.897180, 5102.545042, 5713.502387, 6396.930546, 7156.990844,
1586.515722, 1695.197774, 1746.648696, 1837.136959, 1909.075485,
1975.074651, 2060.159200, 2155.335095, 2259.762505, 2373.710437,
2497.447898, 2631.243895, 2775.367434, 2930.087523, 3095.673170,
3272.393380, 3460.517161, 3660.313520, 3872.051464, 4096.000000,
},
{
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.614483, 0.842937, 1.050824, 1.326663, 1.717750,
2.530591, 3.582302, 6.995373, 9.973335, 24.042464,
56.598240, 113.680735, 180.018689, 231.050567, 266.101082,
294.957934, 323.326511, 349.434429, 380.443211, 408.171987,
441.214916, 475.716772, 512.900000, 551.186939, 592.364455,
624.527378, 661.940693, 679.185473, 724.800679, 764.781792,
873.050019, 950.299001, 939.292954, 1052.406153, 1030.816617,
1086.316710, 1275.467594, 1671.923018, 2349.336727, 3381.362469,
4841.653990, 6803.865037, 9341.649358, 12528.660698, 16438.552805,
21144.979426, 26721.594308, 33242.051197, 40780.003840, 49409.105984,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.296997, 0.342545, 0.403097, 0.472889,
0.614483, 0.842937, 1.050824, 1.326663, 1.717750,
2.530591, 3.582302, 6.995373, 9.973335, 24.042464,
56.598240, 113.680735, 180.018689, 231.050567, 266.101082,
294.957934, 323.326511, 349.434429, 380.443211, 408.171987,
441.214916, 475.716772, 512.900000, 551.186939, 592.364455,
624.527378, 661.940693, 679.185473, 724.800679, 764.781792,
873.050019, 950.299001, 939.292954, 1052.406153, 1033.893184,
1112.182406, 1219.174326, 1337.296681, 1471.648357, 1622.492809,
1790.093491, 1974.713858, 2176.617364, 2396.067465, 2633.327614,
2888.661266, 3162.331876, 3454.602899, 3765.737789, 4096.000000,
},
};
static const double interp_dgrid_curv[65] = {
14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855,
14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.555776, 14.533692,
14.439920, 14.257791, 13.977230, 13.623229, 13.064884, 12.355411, 11.560773,
10.728960, 9.861975, 8.643612, 6.916021, 5.154769, 3.734940, 2.680051,
1.925506, 1.408410, 1.042223, 0.767641, 0.565392, 0.420116, 0.310427,
0.231711, 0.172999, 0.128293, 0.094992, 0.072171, 0.052972, 0.039354,
0.029555, 0.022857, 0.016832, 0.013297, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
0.000000, 0.000000,
static const double interp_dgrid_curv[2][65] = {
{
16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
},
{
16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
},
};
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
double *distbysse_f) {
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
double *rate_f, double *distbysse_f) {
const double x_start = -15.5;
const double x_end = 16.5;
const double x_step = 0.5;
const double epsilon = 1e-6;
const int cat = bsize_model_cat_lookup[bsize];
const int rcat = bsize_model_cat_lookup[bsize];
const int dcat = sse_norm_model_cat_lookup(sse_norm);
(void)x_end;
xqr = AOMMAX(xqr, x_start + x_step + epsilon);
@@ -1400,9 +1441,9 @@ void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
assert(xi > 0);
const double *prate = &interp_rgrid_curv[cat][(xi - 1)];
const double *pdist = &interp_dgrid_curv[(xi - 1)];
const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
*rate_f = interp_cubic(prate, xo);
const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
*distbysse_f = interp_cubic(pdist, xo);
}
@@ -1565,7 +1606,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
} else {
rd->thresh_mult[THR_NEARESTMV] = 0;
rd->thresh_mult[THR_NEARESTL2] = 0;
rd->thresh_mult[THR_NEARESTL3] = 0;
rd->thresh_mult[THR_NEARESTL3] = 100;
rd->thresh_mult[THR_NEARESTB] = 0;
rd->thresh_mult[THR_NEARESTA2] = 0;
rd->thresh_mult[THR_NEARESTA] = 0;
@@ -1576,7 +1617,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_NEWL2] += 1000;
rd->thresh_mult[THR_NEWL3] += 1000;
rd->thresh_mult[THR_NEWB] += 1000;
rd->thresh_mult[THR_NEWA2] = 1000;
rd->thresh_mult[THR_NEWA2] = 1100;
rd->thresh_mult[THR_NEWA] += 1000;
rd->thresh_mult[THR_NEWG] += 1000;
@@ -1588,18 +1629,18 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_NEARA] += 1000;
rd->thresh_mult[THR_NEARG] += 1000;
rd->thresh_mult[THR_GLOBALMV] += 2000;
rd->thresh_mult[THR_GLOBALMV] += 2200;
rd->thresh_mult[THR_GLOBALL2] += 2000;
rd->thresh_mult[THR_GLOBALL3] += 2000;
rd->thresh_mult[THR_GLOBALB] += 2000;
rd->thresh_mult[THR_GLOBALB] += 2400;
rd->thresh_mult[THR_GLOBALA2] = 2000;
rd->thresh_mult[THR_GLOBALG] += 2000;
rd->thresh_mult[THR_GLOBALA] += 2000;
rd->thresh_mult[THR_GLOBALA] += 2400;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1100;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 800;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 900;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
@@ -1617,17 +1658,17 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2500;
rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1530;
rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1870;
rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2400;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2750;
rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1870;
rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 1800;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] += 2500;
rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
@@ -1636,23 +1677,23 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 2500;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 3000;
rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1320;
rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 2040;
rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2500;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2250;
rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1360;
rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2500;
rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2400;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2250;
rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
@@ -1665,7 +1706,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700;
rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1870;
rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] += 2500;
@@ -1679,7 +1720,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] += 2500;
rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500;
rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1800;
rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700;
@@ -1694,7 +1735,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] += 2500;
rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1440;
rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500;
rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700;
@@ -1708,29 +1749,29 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2500;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2750;
rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1600;
rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 2000;
rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 2000;
rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2200;
rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2640;
rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2400;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] += 3200;
rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1600;
rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 2000;
rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 2000;
rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 1800;
rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2400;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] += 3200;
rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1600;
rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2000;
rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1760;
rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2400;
rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 2000;
rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2200;
rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 1760;
rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2640;
rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2400;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] += 3200;
@@ -1738,21 +1779,21 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 2000;
rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 2000;
rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEARBA] += 2200;
rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2400;
rd->thresh_mult[THR_COMP_NEW_NEARBA] += 1980;
rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2640;
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] += 3200;
rd->thresh_mult[THR_DC] += 1000;
rd->thresh_mult[THR_PAETH] += 1000;
rd->thresh_mult[THR_SMOOTH] += 2000;
rd->thresh_mult[THR_SMOOTH] += 2200;
rd->thresh_mult[THR_SMOOTH_V] += 2000;
rd->thresh_mult[THR_SMOOTH_H] += 2000;
rd->thresh_mult[THR_H_PRED] += 2000;
rd->thresh_mult[THR_V_PRED] += 2000;
rd->thresh_mult[THR_V_PRED] += 1800;
rd->thresh_mult[THR_D135_PRED] += 2500;
rd->thresh_mult[THR_D203_PRED] += 2500;
rd->thresh_mult[THR_D203_PRED] += 2000;
rd->thresh_mult[THR_D157_PRED] += 2500;
rd->thresh_mult[THR_D67_PRED] += 2500;
rd->thresh_mult[THR_D67_PRED] += 2000;
rd->thresh_mult[THR_D113_PRED] += 2500;
rd->thresh_mult[THR_D45_PRED] += 2500;
}
+3 -3
View File
@@ -656,10 +656,10 @@ void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
unsigned int qstep, int *rate, int64_t *dist);
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
double *distbysse_f);
void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
double *rate_f, double *distbysse_f);
void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
double yl, double *rate_f, double *distbysse_f);
int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
const MACROBLOCKD *xd);
+391 -300
View File
File diff suppressed because it is too large Load Diff
-2
View File
@@ -151,10 +151,8 @@ typedef struct {
sobel_xy sobel(const uint8_t *input, int stride, int i, int j, bool high_bd);
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult);
#endif
#ifdef __cplusplus
} // extern "C"
+30 -52
View File
@@ -237,46 +237,19 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
}
}
static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
MACROBLOCKD *xd, BLOCK_SIZE bsize,
int mi_row, int mi_col,
int plane_from, int plane_to) {
int plane;
static void build_inter_predictors_for_plane(const AV1_COMMON *cm,
MACROBLOCKD *xd, int mi_row,
int mi_col, const BUFFER_SET *ctx,
BLOCK_SIZE bsize, int plane_idx) {
const struct macroblockd_plane *pd = &xd->plane[plane_idx];
if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
pd->subsampling_y))
return;
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
for (plane = plane_from; plane <= plane_to; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = pd->width;
const int bh = pd->height;
if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
pd->subsampling_y))
continue;
build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
}
}
void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, 0);
}
void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
for (int plane_idx = 1; plane_idx < MAX_MB_PLANE; plane_idx++) {
av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize,
plane_idx);
}
}
void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize, int plane_idx) {
build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, plane_idx,
plane_idx);
build_inter_predictors(cm, xd, plane_idx, xd->mi[0], 0, pd->width, pd->height,
mi_x, mi_y);
if (is_interintra_pred(xd->mi[0])) {
BUFFER_SET default_ctx = { { NULL, NULL, NULL }, { 0, 0, 0 } };
@@ -291,13 +264,14 @@ void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
}
}
void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
const int num_planes = av1_num_planes(cm);
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
if (num_planes > 1)
av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
void av1_enc_build_inter_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
const BUFFER_SET *ctx, BLOCK_SIZE bsize,
int plane_from, int plane_to) {
for (int plane_idx = plane_from; plane_idx <= plane_to; ++plane_idx) {
build_inter_predictors_for_plane(cm, xd, mi_row, mi_col, ctx, bsize,
plane_idx);
}
}
// TODO(sarahparker):
@@ -453,7 +427,7 @@ void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(xd)) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
dst_buf1[1] =
@@ -576,37 +550,41 @@ static void build_wedge_inter_predictor_from_buf(
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
mbmi->interinter_comp.seg_mask = xd->seg_mask;
const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
const int is_hbd = is_cur_buf_hbd(xd);
if (is_compound && is_masked_compound_type(comp_data->type)) {
if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (is_hbd) {
av1_build_compound_diffwtd_mask_highbd(
comp_data->seg_mask, comp_data->mask_type,
CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
else
} else {
av1_build_compound_diffwtd_mask(
comp_data->seg_mask, comp_data->mask_type, ext_dst0,
ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w);
}
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (is_hbd) {
build_masked_compound_highbd(
dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data,
mbmi->sb_type, h, w, xd->bd);
else
} else {
build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type,
h, w);
}
} else {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (is_hbd) {
aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
xd->bd);
else
} else {
aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
0, NULL, 0, w, h);
}
}
}
+4 -15
View File
@@ -23,21 +23,10 @@
extern "C" {
#endif
void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize, int plane_idx);
void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
void av1_enc_build_inter_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
const BUFFER_SET *ctx, BLOCK_SIZE bsize,
int plane_from, int plane_to);
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, const MV *src_mv,
+34 -34
View File
@@ -80,7 +80,7 @@ static int frame_is_boosted(const AV1_COMP *cpi) {
// partly on the screen area that over which they propogate. Propogation is
// limited by transform block size but the screen area take up by a given block
// size will be larger for a small image format stretched to full screen.
static BLOCK_SIZE set_partition_min_limit(AV1_COMMON *const cm) {
static BLOCK_SIZE set_partition_min_limit(const AV1_COMMON *const cm) {
unsigned int screen_area = (cm->width * cm->height);
// Select block size based on image format size.
@@ -103,10 +103,9 @@ static int has_internal_image_edge(const AV1_COMP *cpi) {
(cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
}
static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
AV1_COMMON *const cm = &cpi->common;
static void set_good_speed_feature_framesize_dependent(
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
const AV1_COMMON *const cm = &cpi->common;
const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
@@ -201,11 +200,12 @@ static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
}
}
static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
AV1_COMMON *const cm = &cpi->common;
static void set_good_speed_features_framesize_independent(
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
const AV1_COMMON *const cm = &cpi->common;
const int boosted = frame_is_boosted(cpi);
const int is_boosted_arf2_bwd_type =
boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame;
// Speed 0 for all speed features that give neutral coding performance change.
sf->reduce_inter_modes = 1;
@@ -213,6 +213,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->ml_prune_rect_partition = 1;
sf->ml_prune_ab_partition = 1;
sf->ml_prune_4_partition = 1;
sf->simple_motion_search_prune_rect = 1;
sf->adaptive_txb_search_level = 1;
sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
sf->model_based_prune_tx_search_level = 1;
@@ -222,8 +223,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
// TODO(debargha): Test, tweak and turn on either 1 or 2
sf->inter_mode_rd_model_estimation = 1;
sf->prune_ref_frame_for_rect_partitions =
!(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
sf->prune_ref_frame_for_rect_partitions = !is_boosted_arf2_bwd_type;
sf->prune_ref_mode_for_partitions = sf->prune_ref_frame_for_rect_partitions;
sf->less_rectangular_check_level = 1;
sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
@@ -270,7 +270,6 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->prune_single_motion_modes_by_simple_trans = 1;
sf->simple_motion_search_split_only = 1;
sf->simple_motion_search_prune_rect = 1;
sf->disable_wedge_search_var_thresh = 0;
sf->disable_wedge_search_edge_thresh = 0;
@@ -322,7 +321,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
// See aomedia:1778.
// sf->adaptive_motion_search = 1;
sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->use_transform_domain_distortion = 1;
sf->use_transform_domain_distortion = boosted ? 1 : 2;
sf->use_accurate_subpel_search = USE_2_TAPS;
sf->adaptive_rd_thresh = 2;
sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
@@ -333,24 +332,22 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
// TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
// it with cpi->sf.disable_wedge_search_var_thresh.
sf->disable_wedge_interintra_search = 1;
sf->perform_coeff_opt = boosted ? 0 : 3;
sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 3;
}
if (speed >= 4) {
sf->use_intra_txb_hash = 0;
sf->use_mb_rd_hash = 0;
sf->tx_type_search.fast_intra_tx_type_search = 1;
sf->use_square_partition_only_threshold =
boosted ? BLOCK_128X128 : BLOCK_4X4;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 1;
sf->cb_partition_search = !boosted;
sf->alt_ref_search_fp = 1;
sf->skip_sharp_interp_filter_search = 1;
sf->perform_coeff_opt = boosted ? 0 : 4;
sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
sf->adaptive_txb_search_level = boosted ? 2 : 3;
}
if (speed >= 5) {
@@ -400,6 +397,8 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->mv.search_method = FAST_HEX;
sf->partition_search_type = REFERENCE_PARTITION;
sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
// TODO(any): evaluate adaptive_mode_search=1 for speed 7 & 8
sf->adaptive_mode_search = 2;
}
if (speed >= 8) {
sf->mv.search_method = FAST_DIAMOND;
@@ -408,12 +407,12 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
}
}
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
SPEED_FEATURES *const sf = &cpi->sf;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
if (oxcf->mode == GOOD) {
set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
set_good_speed_feature_framesize_dependent(cpi, sf, speed);
}
if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
@@ -427,7 +426,7 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
}
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
AV1_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCK *const x = &cpi->td.mb;
@@ -576,7 +575,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
sf->perform_coeff_opt = 0;
if (oxcf->mode == GOOD)
set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
set_good_speed_features_framesize_independent(cpi, sf, speed);
if (!cpi->seq_params_locked) {
cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;
@@ -591,28 +590,31 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
cpi->diamond_search_sad = av1_diamond_search_sad;
sf->allow_exhaustive_searches = 1;
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
sf->exhaustive_searches_thresh = (1 << 24);
else
sf->exhaustive_searches_thresh = (1 << 25);
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
if (speed > 0)
sf->max_exaustive_pct = good_quality_max_mesh_pct[mesh_speed];
if (mesh_speed > 0)
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_speed][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[speed][i].interval;
good_quality_mesh_patterns[mesh_speed][i].interval;
}
if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = intrabc_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].interval = intrabc_mesh_patterns[speed][i].interval;
sf->mesh_patterns[i].range = intrabc_mesh_patterns[mesh_speed][i].range;
sf->mesh_patterns[i].interval =
intrabc_mesh_patterns[mesh_speed][i].interval;
}
sf->max_exaustive_pct = intrabc_max_mesh_pct[speed];
sf->max_exaustive_pct = intrabc_max_mesh_pct[mesh_speed];
}
// Slow quant, dct and trellis not worthwhile for first pass
@@ -638,7 +640,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
cpi->optimize_speed_feature =
oxcf->pass != 1 ? sf->optimize_coefficients : NO_TRELLIS_OPT;
// FIXME: trellis not very efficient for quantisation matrices
if (cm->using_qmatrix) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
if (oxcf->using_qm) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
if (oxcf->disable_trellis_quant) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
x->min_partition_size = sf->default_min_partition_size;
@@ -653,9 +655,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
comp_type_rd_threshold_mul[sf->prune_comp_type_by_comp_avg];
cpi->max_comp_type_rd_threshold_div =
comp_type_rd_threshold_div[sf->prune_comp_type_by_comp_avg];
int tx_domain_speed = (oxcf->speed >= MAX_TX_DOMAIN_EVAL_SPEED)
? MAX_TX_DOMAIN_EVAL_SPEED
: oxcf->speed;
const int tx_domain_speed = AOMMIN(speed, MAX_TX_DOMAIN_EVAL_SPEED);
cpi->tx_domain_dist_threshold = tx_domain_dist_thresholds[tx_domain_speed];
// assert ensures that coeff_opt_dist_thresholds is accessed correctly
+4 -2
View File
@@ -656,8 +656,10 @@ typedef struct SPEED_FEATURES {
struct AV1_COMP;
void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi);
void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi);
void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi,
int speed);
void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi,
int speed);
#ifdef __cplusplus
} // extern "C"
+9 -7
View File
@@ -765,7 +765,8 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
// Save input state
uint8_t *input_buffer[MAX_MB_PLANE];
int i;
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
const int is_hbd = is_cur_buf_hbd(mbd);
if (is_hbd) {
predictor = CONVERT_TO_BYTEPTR(predictor16);
} else {
predictor = predictor8;
@@ -887,20 +888,21 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
const unsigned int w = plane ? mb_uv_width : BW;
const unsigned int h = plane ? mb_uv_height : BH;
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (is_hbd) {
highbd_apply_temporal_filter_self(pred, pred_stride, w, h,
blk_fw[0], accum, cnt);
else
} else {
apply_temporal_filter_self(pred, pred_stride, w, h, blk_fw[0],
accum, cnt);
}
pred += BLK_PELS;
accum += BLK_PELS;
cnt += BLK_PELS;
}
} else {
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
if (is_hbd) {
const int adj_strength = strength + 2 * (mbd->bd - 8);
if (num_planes <= 1) {
// Single plane case
@@ -943,7 +945,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
}
// Normalize filter output to produce AltRef frame
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_hbd) {
uint16_t *dst1_16;
uint16_t *dst2_16;
dst1 = cpi->alt_ref_buffer.y_buffer;
@@ -1139,7 +1141,7 @@ static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
struct lookahead_entry *buf = av1_lookahead_peek(cpi->lookahead, distance);
double noiselevel;
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (is_cur_buf_hbd(mbd)) {
noiselevel = highbd_estimate_noise(
buf->img.y_buffer, buf->img.y_crop_width, buf->img.y_crop_height,
buf->img.y_stride, mbd->bd, EDGE_THRESHOLD);
+595
View File
@@ -0,0 +1,595 @@
/*
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <stdint.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "aom/aom_codec.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/reconintra.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/reconinter_enc.h"
typedef struct GF_PICTURE {
YV12_BUFFER_CONFIG *frame;
int ref_frame[7];
} GF_PICTURE;
static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
TX_SIZE tx_size, int64_t *recon_error,
int64_t *sse) {
const struct macroblock_plane *const p = &x->plane[plane];
const SCAN_ORDER *const scan_order = &av1_default_scan_orders[tx_size];
uint16_t eob;
int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
const int shift = tx_size == TX_32X32 ? 0 : 2;
av1_quantize_fp_32x32(coeff, pix_num, p->zbin_QTX, p->round_fp_QTX,
p->quant_fp_QTX, p->quant_shift_QTX, qcoeff, dqcoeff,
p->dequant_QTX, &eob, scan_order->scan,
scan_order->iscan);
*recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
*recon_error = AOMMAX(*recon_error, 1);
*sse = (*sse) >> shift;
*sse = AOMMAX(*sse, 1);
}
static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
TX_SIZE tx_size) {
switch (tx_size) {
case TX_8X8: aom_hadamard_8x8(src_diff, bw, coeff); break;
case TX_16X16: aom_hadamard_16x16(src_diff, bw, coeff); break;
case TX_32X32: aom_hadamard_32x32(src_diff, bw, coeff); break;
default: assert(0);
}
}
static uint32_t motion_compensated_prediction(AV1_COMP *cpi, ThreadData *td,
uint8_t *cur_frame_buf,
uint8_t *ref_frame_buf,
int stride, BLOCK_SIZE bsize,
int mi_row, int mi_col) {
AV1_COMMON *cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
const SEARCH_METHODS search_method = NSTEP;
int step_param;
int sadpb = x->sadperbit16;
uint32_t bestsme = UINT_MAX;
int distortion;
uint32_t sse;
int cost_list[5];
const MvLimits tmp_mv_limits = x->mv_limits;
MV best_ref_mv1 = { 0, 0 };
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
best_ref_mv1_full.col = best_ref_mv1.col >> 3;
best_ref_mv1_full.row = best_ref_mv1.row >> 3;
// Setup frame pointers
x->plane[0].src.buf = cur_frame_buf;
x->plane[0].src.stride = stride;
xd->plane[0].pre[0].buf = ref_frame_buf;
xd->plane[0].pre[0].stride = stride;
step_param = mv_sf->reduce_first_step_size;
step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
av1_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
search_method, 0, sadpb, cond_cost_list(cpi, cost_list),
&best_ref_mv1, INT_MAX, 0, (MI_SIZE * mi_col),
(MI_SIZE * mi_row), 0);
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
bestsme = cpi->find_fractional_mv_step(
x, cm, mi_row, mi_col, &best_ref_mv1, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, NULL,
0, 0, pw, ph, 1, 1);
return bestsme;
}
static void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
struct scale_factors *sf, GF_PICTURE *gf_picture,
int frame_idx, int16_t *src_diff, tran_low_t *coeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
int64_t *recon_error, int64_t *sse,
TplDepStats *tpl_stats) {
AV1_COMMON *cm = &cpi->common;
ThreadData *td = &cpi->td;
const int bw = 4 << mi_size_wide_log2[bsize];
const int bh = 4 << mi_size_high_log2[bsize];
const int pix_num = bw * bh;
int best_rf_idx = -1;
int_mv best_mv;
int64_t best_inter_cost = INT64_MAX;
int64_t inter_cost;
int rf_idx;
const InterpFilters kernel =
av1_make_interp_filters(EIGHTTAP_REGULAR, EIGHTTAP_REGULAR);
int64_t best_intra_cost = INT64_MAX;
int64_t intra_cost;
PREDICTION_MODE mode;
int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
MB_MODE_INFO mi_above, mi_left;
memset(tpl_stats, 0, sizeof(*tpl_stats));
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
xd->above_mbmi = (mi_row > 0) ? &mi_above : NULL;
xd->left_mbmi = (mi_col > 0) ? &mi_left : NULL;
// Intra prediction search
for (mode = DC_PRED; mode <= PAETH_PRED; ++mode) {
uint8_t *src, *dst;
int src_stride, dst_stride;
src = xd->cur_buf->y_buffer + mb_y_offset;
src_stride = xd->cur_buf->y_stride;
dst = &predictor[0];
dst_stride = bw;
xd->mi[0]->sb_type = bsize;
xd->mi[0]->ref_frame[0] = INTRA_FRAME;
av1_predict_intra_block(
cm, xd, block_size_wide[bsize], block_size_high[bsize], tx_size, mode,
0, 0, FILTER_INTRA_MODES, src, src_stride, dst, dst_stride, 0, 0, 0);
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
dst_stride, xd->bd);
} else {
aom_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
dst_stride);
}
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
intra_cost = aom_satd(coeff, pix_num);
if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
}
// Motion compensated prediction
best_mv.as_int = 0;
(void)mb_y_offset;
// Motion estimation column boundary
x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND));
x->mv_limits.col_max =
((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND);
for (rf_idx = 0; rf_idx < 7; ++rf_idx) {
if (ref_frame[rf_idx] == NULL) continue;
motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
ref_frame[rf_idx]->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, bsize, mi_row, mi_col);
// TODO(jingning): Not yet support high bit-depth in the next three
// steps.
ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
WarpTypesAllowed warp_types;
memset(&warp_types, 0, sizeof(WarpTypesAllowed));
av1_build_inter_predictor(
ref_frame[rf_idx]->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_stride,
&predictor[0], bw, &x->best_mv.as_mv, sf, bw, bh, &conv_params, kernel,
&warp_types, mi_col * MI_SIZE, mi_row * MI_SIZE, 0, 0, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
if (is_cur_buf_hbd(xd)) {
aom_highbd_subtract_block(
bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
} else {
aom_subtract_block(bh, bw, src_diff, bw,
xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw);
}
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
inter_cost = aom_satd(coeff, pix_num);
if (inter_cost < best_inter_cost) {
best_rf_idx = rf_idx;
best_inter_cost = inter_cost;
best_mv.as_int = x->best_mv.as_int;
get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
sse);
}
}
best_intra_cost = AOMMAX(best_intra_cost, 1);
best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost);
tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow;
tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
tpl_stats->mv.as_int = best_mv.as_int;
}
static int round_floor(int ref_pos, int bsize_pix) {
int round;
if (ref_pos < 0)
round = -(1 + (-ref_pos - 1) / bsize_pix);
else
round = ref_pos / bsize_pix;
return round;
}
static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
int ref_pos_col, int block, BLOCK_SIZE bsize) {
int width = 0, height = 0;
int bw = 4 << mi_size_wide_log2[bsize];
int bh = 4 << mi_size_high_log2[bsize];
switch (block) {
case 0:
width = grid_pos_col + bw - ref_pos_col;
height = grid_pos_row + bh - ref_pos_row;
break;
case 1:
width = ref_pos_col + bw - grid_pos_col;
height = grid_pos_row + bh - ref_pos_row;
break;
case 2:
width = grid_pos_col + bw - ref_pos_col;
height = ref_pos_row + bh - grid_pos_row;
break;
case 3:
width = ref_pos_col + bw - grid_pos_col;
height = ref_pos_row + bh - grid_pos_row;
break;
default: assert(0);
}
return width * height;
}
static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
int mi_row, int mi_col, const BLOCK_SIZE bsize) {
TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
MV mv = tpl_stats->mv.as_mv;
int mv_row = mv.row >> 3;
int mv_col = mv.col >> 3;
int ref_pos_row = mi_row * MI_SIZE + mv_row;
int ref_pos_col = mi_col * MI_SIZE + mv_col;
const int bw = 4 << mi_size_wide_log2[bsize];
const int bh = 4 << mi_size_high_log2[bsize];
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
const int pix_num = bw * bh;
// top-left on grid block location in pixel
int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
int block;
for (block = 0; block < 4; ++block) {
int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
int overlap_area = get_overlap_area(
grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
int64_t mc_flow = tpl_stats->mc_dep_cost -
(tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
tpl_stats->intra_cost;
int idx, idy;
for (idy = 0; idy < mi_height; ++idy) {
for (idx = 0; idx < mi_width; ++idx) {
TplDepStats *des_stats =
&ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
(ref_mi_col + idx)];
des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
des_stats->mc_ref_cost +=
((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
pix_num;
assert(overlap_area >= 0);
}
}
}
}
}
static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
int mi_row, int mi_col, const BLOCK_SIZE bsize) {
int idx, idy;
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
for (idy = 0; idy < mi_height; ++idy) {
for (idx = 0; idx < mi_width; ++idx) {
TplDepStats *tpl_ptr =
&tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
BLOCK_4X4);
}
}
}
static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
BLOCK_SIZE bsize, int stride,
const TplDepStats *src_stats) {
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
int idx, idy;
int64_t intra_cost = src_stats->intra_cost / (mi_height * mi_width);
int64_t inter_cost = src_stats->inter_cost / (mi_height * mi_width);
TplDepStats *tpl_ptr;
intra_cost = AOMMAX(1, intra_cost);
inter_cost = AOMMAX(1, inter_cost);
for (idy = 0; idy < mi_height; ++idy) {
tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col];
for (idx = 0; idx < mi_width; ++idx) {
tpl_ptr->intra_cost = intra_cost;
tpl_ptr->inter_cost = inter_cost;
tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
tpl_ptr->ref_frame_index = src_stats->ref_frame_index;
tpl_ptr->mv.as_int = src_stats->mv.as_int;
++tpl_ptr;
}
}
}
static void mc_flow_dispenser(AV1_COMP *cpi, GF_PICTURE *gf_picture,
int frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
YV12_BUFFER_CONFIG *ref_frame[7] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
AV1_COMMON *cm = &cpi->common;
struct scale_factors sf;
int rdmult, idx;
ThreadData *td = &cpi->td;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
int mi_row, mi_col;
DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
uint8_t *predictor;
DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
const BLOCK_SIZE bsize = BLOCK_32X32;
const TX_SIZE tx_size = max_txsize_lookup[bsize];
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
int64_t recon_error, sse;
// Setup scaling factor
av1_setup_scale_factors_for_frame(
&sf, this_frame->y_crop_width, this_frame->y_crop_height,
this_frame->y_crop_width, this_frame->y_crop_height);
if (is_cur_buf_hbd(xd))
predictor = CONVERT_TO_BYTEPTR(predictor16);
else
predictor = predictor8;
// Prepare reference frame pointers. If any reference frame slot is
// unavailable, the pointer will be set to Null.
for (idx = 0; idx < 7; ++idx) {
int rf_idx = gf_picture[frame_idx].ref_frame[idx];
if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
}
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
xd->cur_buf = this_frame;
// Get rd multiplier set up.
rdmult = (int)av1_compute_rd_mult(cpi, tpl_frame->base_qindex);
if (rdmult < 1) rdmult = 1;
set_error_per_bit(&cpi->td.mb, rdmult);
av1_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
tpl_frame->is_valid = 1;
cm->base_qindex = tpl_frame->base_qindex;
av1_frame_init_quantizer(cpi);
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
// Motion estimation row boundary
x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND));
x->mv_limits.row_max =
(cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * AOM_INTERP_EXTEND);
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
TplDepStats tpl_stats;
mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff,
qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size,
ref_frame, predictor, &recon_error, &sse, &tpl_stats);
// Motion flow dependency dispenser.
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
tpl_frame->stride, &tpl_stats);
tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
bsize);
}
}
}
static void init_gop_frames(AV1_COMP *cpi, GF_PICTURE *gf_picture,
const GF_GROUP *gf_group, int *tpl_group_frames,
const EncodeFrameInput *const frame_input) {
AV1_COMMON *cm = &cpi->common;
const SequenceHeader *const seq_params = &cm->seq_params;
int frame_idx = 0;
int i;
int gld_index = -1;
int alt_index = -1;
int lst_index = -1;
int extend_frame_count = 0;
int pframe_qindex = cpi->tpl_stats[2].base_qindex;
RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
int recon_frame_index[INTER_REFS_PER_FRAME + 1] = { -1, -1, -1, -1,
-1, -1, -1, -1 };
// TODO(jingning): To be used later for gf frame type parsing.
(void)gf_group;
for (i = 0; i < FRAME_BUFFERS && frame_idx < INTER_REFS_PER_FRAME + 1; ++i) {
if (frame_bufs[i].ref_count == 0) {
alloc_frame_mvs(cm, &frame_bufs[i]);
if (aom_realloc_frame_buffer(
&frame_bufs[i].buf, cm->width, cm->height,
seq_params->subsampling_x, seq_params->subsampling_y,
seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
cm->byte_alignment, NULL, NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
recon_frame_index[frame_idx] = i;
++frame_idx;
}
}
for (i = 0; i < INTER_REFS_PER_FRAME + 1; ++i) {
assert(recon_frame_index[i] >= 0);
cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
}
*tpl_group_frames = 0;
// Initialize Golden reference frame.
gf_picture[0].frame = NULL;
RefCntBuffer *ref_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
if (ref_buf) gf_picture[0].frame = &ref_buf->buf;
for (i = 0; i < 7; ++i) gf_picture[0].ref_frame[i] = -1;
gld_index = 0;
++*tpl_group_frames;
// Initialize ARF frame
gf_picture[1].frame = frame_input->source;
gf_picture[1].ref_frame[0] = gld_index;
gf_picture[1].ref_frame[1] = lst_index;
gf_picture[1].ref_frame[2] = alt_index;
// TODO(yuec) Need o figure out full AV1 reference model
for (i = 3; i < 7; ++i) gf_picture[1].ref_frame[i] = -1;
alt_index = 1;
++*tpl_group_frames;
// Initialize P frames
for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
struct lookahead_entry *buf =
av1_lookahead_peek(cpi->lookahead, frame_idx - 2);
if (buf == NULL) break;
gf_picture[frame_idx].frame = &buf->img;
gf_picture[frame_idx].ref_frame[0] = gld_index;
gf_picture[frame_idx].ref_frame[1] = lst_index;
gf_picture[frame_idx].ref_frame[2] = alt_index;
for (i = 3; i < 7; ++i) gf_picture[frame_idx].ref_frame[i] = -1;
++*tpl_group_frames;
lst_index = frame_idx;
if (frame_idx == cpi->rc.baseline_gf_interval + 1) break;
}
gld_index = frame_idx;
lst_index = AOMMAX(0, frame_idx - 1);
alt_index = -1;
++frame_idx;
// Extend two frames outside the current gf group.
for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
struct lookahead_entry *buf =
av1_lookahead_peek(cpi->lookahead, frame_idx - 2);
if (buf == NULL) break;
cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
gf_picture[frame_idx].frame = &buf->img;
gf_picture[frame_idx].ref_frame[0] = gld_index;
gf_picture[frame_idx].ref_frame[1] = lst_index;
gf_picture[frame_idx].ref_frame[2] = alt_index;
for (i = 3; i < 7; ++i) gf_picture[frame_idx].ref_frame[i] = -1;
lst_index = frame_idx;
++*tpl_group_frames;
++extend_frame_count;
}
}
static void init_tpl_stats(AV1_COMP *cpi) {
int frame_idx;
for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
memset(tpl_frame->tpl_stats_ptr, 0,
tpl_frame->height * tpl_frame->width *
sizeof(*tpl_frame->tpl_stats_ptr));
tpl_frame->is_valid = 0;
}
}
void av1_tpl_setup_stats(AV1_COMP *cpi,
const EncodeFrameInput *const frame_input) {
GF_PICTURE gf_picture[MAX_LAG_BUFFERS];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
int tpl_group_frames = 0;
int frame_idx;
init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames, frame_input);
init_tpl_stats(cpi);
// Backward propagation from tpl_group_frames to 1.
for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx)
mc_flow_dispenser(cpi, gf_picture, frame_idx);
}
+26
View File
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_AV1_ENCODER_TPL_MODEL_H_
#define AOM_AV1_ENCODER_TPL_MODEL_H_
#ifdef __cplusplus
extern "C" {
#endif
void av1_tpl_setup_stats(AV1_COMP *cpi,
const EncodeFrameInput *const frame_input);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AOM_AV1_ENCODER_TPL_MODEL_H_
+9 -777
View File
@@ -1407,6 +1407,13 @@ static INLINE void fadst16x16_new_avx2(const __m256i *input, __m256i *output,
output[14] = x1[15];
output[15] = x1[0];
}
static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
const __m256i scale__r = pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
const __m256i b = _mm256_madd_epi16(a, scale__r);
return _mm256_srai_epi32(b, NewSqrt2Bits);
}
static INLINE void fidentity16x16_new_avx2(const __m256i *input,
__m256i *output, int8_t cos_bit) {
(void)cos_bit;
@@ -1990,781 +1997,6 @@ static void lowbd_fwd_txfm2d_64x16_avx2(const int16_t *input, int32_t *output,
}
}
void btf_16_avx2(__m256i w0, __m256i w1, __m256i in0, __m256i in1,
__m128i *out0, __m128i *out1, __m128i *out2, __m128i *out3,
__m256i __rounding, int8_t cos_bit) {
__m256i t0 = _mm256_unpacklo_epi16(in0, in1);
__m256i t1 = _mm256_unpackhi_epi16(in0, in1);
__m256i u0 = _mm256_madd_epi16(t0, w0);
__m256i u1 = _mm256_madd_epi16(t1, w0);
__m256i v0 = _mm256_madd_epi16(t0, w1);
__m256i v1 = _mm256_madd_epi16(t1, w1);
__m256i a0 = _mm256_add_epi32(u0, __rounding);
__m256i a1 = _mm256_add_epi32(u1, __rounding);
__m256i b0 = _mm256_add_epi32(v0, __rounding);
__m256i b1 = _mm256_add_epi32(v1, __rounding);
__m256i c0 = _mm256_srai_epi32(a0, cos_bit);
__m256i c1 = _mm256_srai_epi32(a1, cos_bit);
__m256i d0 = _mm256_srai_epi32(b0, cos_bit);
__m256i d1 = _mm256_srai_epi32(b1, cos_bit);
__m256i temp0 = _mm256_packs_epi32(c0, c1);
__m256i temp1 = _mm256_packs_epi32(d0, d1);
*out0 = _mm256_castsi256_si128(temp0);
*out1 = _mm256_castsi256_si128(temp1);
*out2 = _mm256_extractf128_si256(temp0, 0x01);
*out3 = _mm256_extractf128_si256(temp1, 0x01);
}
static INLINE void fdct8x8_new_avx2(const __m256i *input, __m256i *output,
int8_t cos_bit) {
const int32_t *cospi = cospi_arr(cos_bit);
const __m256i __rounding = _mm256_set1_epi32(1 << (cos_bit - 1));
__m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
__m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
__m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
__m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
__m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
__m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
__m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
__m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
__m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
// stage 1
__m256i x1[8];
x1[0] = _mm256_adds_epi16(input[0], input[7]);
x1[7] = _mm256_subs_epi16(input[0], input[7]);
x1[1] = _mm256_adds_epi16(input[1], input[6]);
x1[6] = _mm256_subs_epi16(input[1], input[6]);
x1[2] = _mm256_adds_epi16(input[2], input[5]);
x1[5] = _mm256_subs_epi16(input[2], input[5]);
x1[3] = _mm256_adds_epi16(input[3], input[4]);
x1[4] = _mm256_subs_epi16(input[3], input[4]);
// stage 2
__m256i x2[8];
x2[0] = _mm256_adds_epi16(x1[0], x1[3]);
x2[3] = _mm256_subs_epi16(x1[0], x1[3]);
x2[1] = _mm256_adds_epi16(x1[1], x1[2]);
x2[2] = _mm256_subs_epi16(x1[1], x1[2]);
x2[4] = x1[4];
btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], __rounding,
cos_bit);
x2[5] = x1[5];
x2[6] = x1[6];
x2[7] = x1[7];
// stage 3
__m256i x3[8];
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x2[0], &x2[1], __rounding,
cos_bit);
x3[0] = x2[0];
x3[1] = x2[1];
btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x2[2], &x2[3], __rounding,
cos_bit);
x3[2] = x2[2];
x3[3] = x2[3];
x3[4] = _mm256_adds_epi16(x2[4], x2[5]);
x3[5] = _mm256_subs_epi16(x2[4], x2[5]);
x3[6] = _mm256_subs_epi16(x2[7], x2[6]);
x3[7] = _mm256_adds_epi16(x2[7], x2[6]);
// stage 4
__m256i x4[8];
x4[0] = x3[0];
x4[1] = x3[1];
x4[2] = x3[2];
x4[3] = x3[3];
btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x3[4], &x3[7], __rounding,
cos_bit);
x4[4] = x3[4];
x4[7] = x3[7];
btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x3[5], &x3[6], __rounding,
cos_bit);
x4[5] = x3[5];
x4[6] = x3[6];
// stage 5
output[0] = x4[0];
output[1] = x4[4];
output[2] = x4[2];
output[3] = x4[6];
output[4] = x4[1];
output[5] = x4[5];
output[6] = x4[3];
output[7] = x4[7];
}
static INLINE void fadst8x8_new_avx2(const __m256i *input, __m256i *output,
int8_t cos_bit) {
const int32_t *cospi = cospi_arr(cos_bit);
const __m256i __zero = _mm256_setzero_si256();
const __m256i __rounding = _mm256_set1_epi32(1 << (cos_bit - 1));
__m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
__m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
__m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
__m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
__m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
__m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
__m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
__m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
__m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
__m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
__m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
__m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
__m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
// stage 1
__m256i x1[8];
x1[0] = input[0];
x1[1] = _mm256_subs_epi16(__zero, input[7]);
x1[2] = _mm256_subs_epi16(__zero, input[3]);
x1[3] = input[4];
x1[4] = _mm256_subs_epi16(__zero, input[1]);
x1[5] = input[6];
x1[6] = input[2];
x1[7] = _mm256_subs_epi16(__zero, input[5]);
// stage 2
__m256i x2[8];
x2[0] = x1[0];
x2[1] = x1[1];
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], __rounding,
cos_bit);
x2[2] = x1[2];
x2[3] = x1[3];
x2[4] = x1[4];
x2[5] = x1[5];
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], __rounding,
cos_bit);
x2[6] = x1[6];
x2[7] = x1[7];
// stage 3
__m256i x3[8];
x3[0] = _mm256_adds_epi16(x2[0], x2[2]);
x3[2] = _mm256_subs_epi16(x2[0], x2[2]);
x3[1] = _mm256_adds_epi16(x2[1], x2[3]);
x3[3] = _mm256_subs_epi16(x2[1], x2[3]);
x3[4] = _mm256_adds_epi16(x2[4], x2[6]);
x3[6] = _mm256_subs_epi16(x2[4], x2[6]);
x3[5] = _mm256_adds_epi16(x2[5], x2[7]);
x3[7] = _mm256_subs_epi16(x2[5], x2[7]);
// stage 4
__m256i x4[8];
x4[0] = x3[0];
x4[1] = x3[1];
x4[2] = x3[2];
x4[3] = x3[3];
btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x3[4], &x3[5], __rounding,
cos_bit);
x4[4] = x3[4];
x4[5] = x3[5];
btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x3[6], &x3[7], __rounding,
cos_bit);
x4[6] = x3[6];
x4[7] = x3[7];
// stage 5
__m256i x5[8];
x5[0] = _mm256_adds_epi16(x4[0], x4[4]);
x5[4] = _mm256_subs_epi16(x4[0], x4[4]);
x5[1] = _mm256_adds_epi16(x4[1], x4[5]);
x5[5] = _mm256_subs_epi16(x4[1], x4[5]);
x5[2] = _mm256_adds_epi16(x4[2], x4[6]);
x5[6] = _mm256_subs_epi16(x4[2], x4[6]);
x5[3] = _mm256_adds_epi16(x4[3], x4[7]);
x5[7] = _mm256_subs_epi16(x4[3], x4[7]);
// stage 6
__m256i x6[8];
btf_16_w16_avx2(cospi_p04_p60, cospi_p60_m04, &x5[0], &x5[1], __rounding,
cos_bit);
x6[0] = x5[0];
x6[1] = x5[1];
btf_16_w16_avx2(cospi_p20_p44, cospi_p44_m20, &x5[2], &x5[3], __rounding,
cos_bit);
x6[2] = x5[2];
x6[3] = x5[3];
btf_16_w16_avx2(cospi_p36_p28, cospi_p28_m36, &x5[4], &x5[5], __rounding,
cos_bit);
x6[4] = x5[4];
x6[5] = x5[5];
btf_16_w16_avx2(cospi_p52_p12, cospi_p12_m52, &x5[6], &x5[7], __rounding,
cos_bit);
x6[6] = x5[6];
x6[7] = x5[7];
// stage 7
output[0] = x6[1];
output[1] = x6[6];
output[2] = x6[3];
output[3] = x6[4];
output[4] = x6[5];
output[5] = x6[2];
output[6] = x6[7];
output[7] = x6[0];
}
static INLINE void fidentity8x8_new_avx2(const __m256i *input, __m256i *output,
int8_t cos_bit) {
(void)cos_bit;
output[0] = _mm256_adds_epi16(input[0], input[0]);
output[1] = _mm256_adds_epi16(input[1], input[1]);
output[2] = _mm256_adds_epi16(input[2], input[2]);
output[3] = _mm256_adds_epi16(input[3], input[3]);
output[4] = _mm256_adds_epi16(input[4], input[4]);
output[5] = _mm256_adds_epi16(input[5], input[5]);
output[6] = _mm256_adds_epi16(input[6], input[6]);
output[7] = _mm256_adds_epi16(input[7], input[7]);
}
static INLINE void fdct8x16_new_avx2(const __m128i *input, __m128i *output,
int8_t cos_bit) {
const int32_t *cospi = cospi_arr(cos_bit);
const __m256i __rounding_256 = _mm256_set1_epi32(1 << (cos_bit - 1));
const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
__m128i temp0, temp1, temp2, temp3;
__m256i in0, in1;
__m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
__m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
__m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
__m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
__m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
__m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
__m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
__m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
__m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
__m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
__m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
__m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
__m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
__m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
__m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
__m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
__m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
__m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
__m256i cospi_arr[12];
cospi_arr[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m32_p32),
cospi_m32_p32, 0x1);
cospi_arr[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
cospi_p32_p32, 0x1);
cospi_arr[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
cospi_p48_p16, 0x1);
cospi_arr[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
cospi_m16_p48, 0x1);
cospi_arr[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m16_p48),
cospi_m48_m16, 0x1);
cospi_arr[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_p16),
cospi_m16_p48, 0x1);
cospi_arr[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p56_p08),
cospi_p24_p40, 0x1);
cospi_arr[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m08_p56),
cospi_m40_p24, 0x1);
cospi_arr[8] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p60_p04),
cospi_p28_p36, 0x1);
cospi_arr[9] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m04_p60),
cospi_m36_p28, 0x1);
cospi_arr[10] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p44_p20),
cospi_p12_p52, 0x1);
cospi_arr[11] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m20_p44),
cospi_m52_p12, 0x1);
__m256i x[8];
x[0] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[0]), input[1], 0x1);
x[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[15]), input[14],
0x1);
x[2] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[2]), input[3], 0x1);
x[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[13]), input[12],
0x1);
x[4] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[5]), input[4], 0x1);
x[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[10]), input[11],
0x1);
x[6] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[7]), input[6], 0x1);
x[7] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[8]), input[9], 0x1);
// stage 1
__m256i x1[16];
x1[0] = _mm256_adds_epi16(x[0], x[1]);
x1[7] = _mm256_subs_epi16(x[0], x[1]);
x1[1] = _mm256_adds_epi16(x[2], x[3]);
x1[6] = _mm256_subs_epi16(x[2], x[3]);
x1[2] = _mm256_adds_epi16(x[4], x[5]);
x1[5] = _mm256_subs_epi16(x[4], x[5]);
x1[3] = _mm256_adds_epi16(x[6], x[7]);
x1[4] = _mm256_subs_epi16(x[6], x[7]);
// stage 2
__m256i x2[8];
x2[0] = _mm256_adds_epi16(x1[0], x1[3]);
x2[7] = _mm256_subs_epi16(x1[0], x1[3]);
x2[1] = _mm256_adds_epi16(x1[1], x1[2]);
x2[6] = _mm256_subs_epi16(x1[1], x1[2]);
x2[2] = x1[4];
x2[3] = x1[7];
btf_16_avx2(cospi_arr[0], cospi_arr[1], x1[5], x1[6], &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x2[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp0, 0x1);
x2[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp3), temp1, 0x1);
// stage 3
__m256i x3[8];
x2[1] = _mm256_permute4x64_epi64(x2[1], 0x4e);
x3[0] = _mm256_adds_epi16(x2[0], x2[1]);
x3[1] = _mm256_subs_epi16(x2[0], x2[1]);
x3[2] = _mm256_blend_epi32(x2[7], x2[6], 0xf0);
btf_16_sse2(cospi_m32_p32, cospi_p32_p32, _mm256_castsi256_si128(x2[6]),
_mm256_extractf128_si256(x2[7], 0x01), temp0, temp1);
x3[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp1), temp0, 0x1);
x3[3] = _mm256_adds_epi16(x2[2], x2[4]);
x3[4] = _mm256_subs_epi16(x2[2], x2[4]);
x3[5] = _mm256_adds_epi16(x2[3], x2[5]);
x3[6] = _mm256_subs_epi16(x2[3], x2[5]);
// stage 4
__m256i x4[8];
in0 = _mm256_blend_epi32(x3[0], x3[1], 0xf0);
in1 = _mm256_permute2f128_si256(x3[0], x3[1], 0x21);
btf_16_avx2(cospi_arr[2], cospi_arr[3], in0, in1, &output[0], &output[8],
&output[4], &output[12], __rounding_256, cos_bit);
x4[2] = _mm256_adds_epi16(x3[2], x3[7]);
x4[3] = _mm256_subs_epi16(x3[2], x3[7]);
x4[4] = _mm256_permute2f128_si256(x3[3], x3[4], 0x20);
x4[5] = _mm256_permute2f128_si256(x3[6], x3[5], 0x20);
in0 = _mm256_permute2f128_si256(x3[3], x3[4], 0x31);
in1 = _mm256_permute2f128_si256(x3[5], x3[6], 0x31);
btf_16_avx2(cospi_arr[4], cospi_arr[5], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x4[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp2, 0x1);
x4[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp3), temp1, 0x1);
// stage 5
__m256i x5[8];
in0 = _mm256_permute2f128_si256(x4[2], x4[3], 0x31);
in1 = _mm256_permute2f128_si256(x4[2], x4[3], 0x20);
btf_16_avx2(cospi_arr[6], cospi_arr[7], in0, in1, &output[2], &output[14],
&output[10], &output[6], __rounding_256, cos_bit);
x5[4] = _mm256_adds_epi16(x4[4], x4[6]);
x5[5] = _mm256_subs_epi16(x4[4], x4[6]);
x5[6] = _mm256_adds_epi16(x4[5], x4[7]);
x5[7] = _mm256_subs_epi16(x4[5], x4[7]);
// stage 6
in0 = _mm256_permute2f128_si256(x5[4], x5[5], 0x20);
in1 = _mm256_permute2f128_si256(x5[6], x5[7], 0x31);
btf_16_avx2(cospi_arr[8], cospi_arr[9], in0, in1, &output[1], &output[15],
&output[9], &output[7], __rounding_256, cos_bit);
in0 = _mm256_permute2f128_si256(x5[5], x5[4], 0x31);
in1 = _mm256_permute2f128_si256(x5[7], x5[6], 0x20);
btf_16_avx2(cospi_arr[10], cospi_arr[11], in0, in1, &output[5], &output[11],
&output[13], &output[3], __rounding_256, cos_bit);
}
static INLINE void fadst8x16_new_avx2(const __m128i *input, __m128i *output,
int8_t cos_bit) {
const int32_t *cospi = cospi_arr(cos_bit);
const __m256i __zero = _mm256_setzero_si256();
const __m256i __rounding_256 = _mm256_set1_epi32(1 << (cos_bit - 1));
__m256i in0, in1;
__m128i temp0, temp1, temp2, temp3;
__m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
__m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
__m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
__m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
__m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
__m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
__m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
__m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
__m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
__m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
__m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
__m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
__m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
__m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
__m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
__m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
__m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
__m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
__m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
__m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
__m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
__m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
__m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
__m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
__m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
__m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
__m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
__m256i cospi_arr[20];
cospi_arr[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
cospi_p32_p32, 0x1);
cospi_arr[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
cospi_p32_m32, 0x1);
cospi_arr[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
cospi_p32_p32, 0x1);
cospi_arr[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
cospi_p32_m32, 0x1);
cospi_arr[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p16_p48),
cospi_m48_p16, 0x1);
cospi_arr[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_m16),
cospi_p16_p48, 0x1);
cospi_arr[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p16_p48),
cospi_m48_p16, 0x1);
cospi_arr[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_m16),
cospi_p16_p48, 0x1);
cospi_arr[8] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p08_p56),
cospi_p40_p24, 0x1);
cospi_arr[9] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p56_m08),
cospi_p24_m40, 0x1);
cospi_arr[10] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m56_p08),
cospi_m24_p40, 0x1);
cospi_arr[11] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p08_p56),
cospi_p40_p24, 0x1);
cospi_arr[12] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p02_p62),
cospi_p10_p54, 0x1);
cospi_arr[13] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p62_m02),
cospi_p54_m10, 0x1);
cospi_arr[14] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p18_p46),
cospi_p26_p38, 0x1);
cospi_arr[15] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p46_m18),
cospi_p38_m26, 0x1);
cospi_arr[16] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p34_p30),
cospi_p42_p22, 0x1);
cospi_arr[17] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p30_m34),
cospi_p22_m42, 0x1);
cospi_arr[18] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p50_p14),
cospi_p58_p06, 0x1);
cospi_arr[19] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p14_m50),
cospi_p06_m58, 0x1);
__m256i x[8];
x[0] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[0]), input[4], 0x1);
x[1] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[2]), input[6], 0x1);
x[2] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[8]), input[12], 0x1);
x[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[10]), input[14],
0x1);
x[4] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[1]), input[9], 0x1);
x[5] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[3]), input[11], 0x1);
x[6] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[5]), input[13], 0x1);
x[7] =
_mm256_insertf128_si256(_mm256_castsi128_si256(input[7]), input[15], 0x1);
// stage 1
__m256i x1[8];
x1[0] = x[0];
x1[1] = _mm256_subs_epi16(__zero, x[7]);
x1[2] = x[2];
x1[3] = _mm256_subs_epi16(__zero, x[5]);
x1[4] = _mm256_subs_epi16(__zero, x[4]);
x1[5] = x[3];
x1[6] = _mm256_subs_epi16(__zero, x[6]);
x1[7] = x[1];
// stage 2
__m256i x2[8];
x2[0] = _mm256_blend_epi32(x1[0], x1[1], 0xf0);
x2[3] = _mm256_blend_epi32(x1[3], x1[2], 0xf0);
x2[4] = _mm256_blend_epi32(x1[4], x1[5], 0xf0);
x2[7] = _mm256_blend_epi32(x1[7], x1[6], 0xf0);
in0 = _mm256_blend_epi32(x1[1], x1[0], 0xf0);
in1 = _mm256_blend_epi32(x1[2], x1[3], 0xf0);
btf_16_avx2(cospi_arr[0], cospi_arr[1], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x2[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x2[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
in0 = _mm256_permute2f128_si256(x1[7], x1[6], 0x21);
in1 = _mm256_permute2f128_si256(x1[4], x1[5], 0x21);
btf_16_avx2(cospi_arr[2], cospi_arr[3], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x2[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x2[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
// stage 3
__m256i x3[8];
x3[0] = _mm256_adds_epi16(x2[0], x2[1]);
x3[1] = _mm256_subs_epi16(x2[0], x2[1]);
x3[2] = _mm256_adds_epi16(x2[3], x2[2]);
x3[3] = _mm256_subs_epi16(x2[3], x2[2]);
x3[4] = _mm256_adds_epi16(x2[4], x2[5]);
x3[5] = _mm256_subs_epi16(x2[4], x2[5]);
x3[6] = _mm256_adds_epi16(x2[7], x2[6]);
x3[7] = _mm256_subs_epi16(x2[7], x2[6]);
// stage 4
__m256i x4[8];
x4[0] = x3[0];
x4[1] = x3[1];
x4[4] = x3[4];
x4[5] = x3[5];
in0 = _mm256_permute2f128_si256(x3[2], x3[3], 0x20);
in1 = _mm256_permute2f128_si256(x3[2], x3[3], 0x31);
btf_16_avx2(cospi_arr[4], cospi_arr[5], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x4[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x4[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
in0 = _mm256_permute2f128_si256(x3[6], x3[7], 0x20);
in1 = _mm256_permute2f128_si256(x3[6], x3[7], 0x31);
btf_16_avx2(cospi_arr[6], cospi_arr[7], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x4[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x4[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
// stage 5
__m256i x5[8];
x5[0] = _mm256_adds_epi16(x4[0], x4[2]);
x5[1] = _mm256_subs_epi16(x4[0], x4[2]);
x5[2] = _mm256_adds_epi16(x4[1], x4[3]);
x5[3] = _mm256_subs_epi16(x4[1], x4[3]);
x5[4] = _mm256_adds_epi16(x4[4], x4[6]);
x5[5] = _mm256_subs_epi16(x4[4], x4[6]);
x5[6] = _mm256_adds_epi16(x4[5], x4[7]);
x5[7] = _mm256_subs_epi16(x4[5], x4[7]);
// stage 6
__m256i x6[8];
x6[0] = x5[0];
x6[1] = x5[2];
x6[2] = x5[1];
x6[3] = x5[3];
in0 = _mm256_permute2f128_si256(x5[4], x5[6], 0x20);
in1 = _mm256_permute2f128_si256(x5[4], x5[6], 0x31);
btf_16_avx2(cospi_arr[8], cospi_arr[9], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x6[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x6[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
in0 = _mm256_permute2f128_si256(x5[5], x5[7], 0x20);
in1 = _mm256_permute2f128_si256(x5[5], x5[7], 0x31);
btf_16_avx2(cospi_arr[10], cospi_arr[11], in0, in1, &temp0, &temp1, &temp2,
&temp3, __rounding_256, cos_bit);
x6[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
x6[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
// stage 7
__m256i x7[8];
x7[0] = _mm256_adds_epi16(x6[0], x6[4]);
x7[1] = _mm256_subs_epi16(x6[0], x6[4]);
x7[2] = _mm256_adds_epi16(x6[1], x6[5]);
x7[3] = _mm256_subs_epi16(x6[1], x6[5]);
x7[4] = _mm256_adds_epi16(x6[2], x6[6]);
x7[5] = _mm256_subs_epi16(x6[2], x6[6]);
x7[6] = _mm256_adds_epi16(x6[3], x6[7]);
x7[7] = _mm256_subs_epi16(x6[3], x6[7]);
// stage 8
in0 = _mm256_permute2f128_si256(x7[0], x7[2], 0x20);
in1 = _mm256_permute2f128_si256(x7[0], x7[2], 0x31);
btf_16_avx2(cospi_arr[12], cospi_arr[13], in0, in1, &output[15], &output[0],
&output[13], &output[2], __rounding_256, cos_bit);
in0 = _mm256_permute2f128_si256(x7[4], x7[6], 0x20);
in1 = _mm256_permute2f128_si256(x7[4], x7[6], 0x31);
btf_16_avx2(cospi_arr[14], cospi_arr[15], in0, in1, &output[11], &output[4],
&output[9], &output[6], __rounding_256, cos_bit);
in0 = _mm256_permute2f128_si256(x7[1], x7[3], 0x20);
in1 = _mm256_permute2f128_si256(x7[1], x7[3], 0x31);
btf_16_avx2(cospi_arr[16], cospi_arr[17], in0, in1, &output[7], &output[8],
&output[5], &output[10], __rounding_256, cos_bit);
in0 = _mm256_permute2f128_si256(x7[5], x7[7], 0x20);
in1 = _mm256_permute2f128_si256(x7[5], x7[7], 0x31);
btf_16_avx2(cospi_arr[18], cospi_arr[19], in0, in1, &output[3], &output[12],
&output[1], &output[14], __rounding_256, cos_bit);
}
static INLINE void fidentity8x16_new_avx2(const __m128i *input, __m128i *output,
int8_t cos_bit) {
(void)cos_bit;
const __m256i one = _mm256_set1_epi16(1);
__m256i temp;
for (int i = 0; i < 16; i += 2) {
temp = _mm256_insertf128_si256(_mm256_castsi128_si256(input[i]),
input[i + 1], 0x1);
const __m256i a_lo = _mm256_unpacklo_epi16(temp, one);
const __m256i a_hi = _mm256_unpackhi_epi16(temp, one);
const __m256i b_lo = scale_round_avx2(a_lo, 2 * NewSqrt2);
const __m256i b_hi = scale_round_avx2(a_hi, 2 * NewSqrt2);
temp = _mm256_packs_epi32(b_lo, b_hi);
output[i] = _mm256_castsi256_si128(temp);
output[i + 1] = _mm256_extractf128_si256(temp, 0x1);
}
}
static const transform_1d_avx2 row_txfm8x16_arr[TX_TYPES] = {
fdct8x8_new_avx2, // DCT_DCT
fdct8x8_new_avx2, // ADST_DCT
fadst8x8_new_avx2, // DCT_ADST
fadst8x8_new_avx2, // ADST_ADST
fdct8x8_new_avx2, // FLIPADST_DCT
fadst8x8_new_avx2, // DCT_FLIPADST
fadst8x8_new_avx2, // FLIPADST_FLIPADST
fadst8x8_new_avx2, // ADST_FLIPADST
fadst8x8_new_avx2, // FLIPADST_ADST
fidentity8x8_new_avx2, // IDTX
fidentity8x8_new_avx2, // V_DCT
fdct8x8_new_avx2, // H_DCT
fidentity8x8_new_avx2, // V_ADST
fadst8x8_new_avx2, // H_ADST
fidentity8x8_new_avx2, // V_FLIPADST
fadst8x8_new_avx2 // H_FLIPADST
};
static const transform_1d_sse2 col_txfm8x16_arr[TX_TYPES] = {
fdct8x16_new_avx2, // DCT_DCT
fadst8x16_new_avx2, // ADST_DCT
fdct8x16_new_avx2, // DCT_ADST
fadst8x16_new_avx2, // ADST_ADST
fadst8x16_new_avx2, // FLIPADST_DCT
fdct8x16_new_avx2, // DCT_FLIPADST
fadst8x16_new_avx2, // FLIPADST_FLIPADST
fadst8x16_new_avx2, // ADST_FLIPADST
fadst8x16_new_avx2, // FLIPADST_ADST
fidentity8x16_new_avx2, // IDTX
fdct8x16_new_avx2, // V_DCT
fidentity8x16_new_avx2, // H_DCT
fadst8x16_new_avx2, // V_ADST
fidentity8x16_new_avx2, // H_ADST
fadst8x16_new_avx2, // V_FLIPADST
fidentity8x16_new_avx2 // H_FLIPADST
};
static const transform_1d_avx2 col_txfm16x8_arr[TX_TYPES] = {
fdct8x8_new_avx2, // DCT_DCT
fadst8x8_new_avx2, // ADST_DCT
fdct8x8_new_avx2, // DCT_ADST
fadst8x8_new_avx2, // ADST_ADST
fadst8x8_new_avx2, // FLIPADST_DCT
fdct8x8_new_avx2, // DCT_FLIPADST
fadst8x8_new_avx2, // FLIPADST_FLIPADST
fadst8x8_new_avx2, // ADST_FLIPADST
fadst8x8_new_avx2, // FLIPADST_ADST
fidentity8x8_new_avx2, // IDTX
fdct8x8_new_avx2, // V_DCT
fidentity8x8_new_avx2, // H_DCT
fadst8x8_new_avx2, // V_ADST
fidentity8x8_new_avx2, // H_ADST
fadst8x8_new_avx2, // V_FLIPADST
fidentity8x8_new_avx2, // H_FLIPADST
};
static const transform_1d_sse2 row_txfm16x8_arr[TX_TYPES] = {
fdct8x16_new_avx2, // DCT_DCT
fdct8x16_new_avx2, // ADST_DCT
fadst8x16_new_avx2, // DCT_ADST
fadst8x16_new_avx2, // ADST_ADST
fdct8x16_new_avx2, // FLIPADST_DCT
fadst8x16_new_avx2, // DCT_FLIPADST
fadst8x16_new_avx2, // FLIPADST_FLIPADST
fadst8x16_new_avx2, // ADST_FLIPADST
fadst8x16_new_avx2, // FLIPADST_ADST
fidentity8x16_new_avx2, // IDTX
fidentity8x16_new_avx2, // V_DCT
fdct8x16_new_avx2, // H_DCT
fidentity8x16_new_avx2, // V_ADST
fadst8x16_new_avx2, // H_ADST
fidentity8x16_new_avx2, // V_FLIPADST
fadst8x16_new_avx2 // H_FLIPADST
};
void lowbd_fwd_txfm2d_8x16_avx2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd) {
(void)bd;
__m128i buf0[16], buf1[16];
__m256i buf2[8];
const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
const int txw_idx = get_txw_idx(TX_8X16);
const int txh_idx = get_txh_idx(TX_8X16);
const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
const int width = 8;
const int height = 16;
const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
const transform_1d_avx2 row_txfm = row_txfm8x16_arr[tx_type];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
if (ud_flip) {
load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
} else {
load_buffer_16bit_to_16bit(input, stride, buf0, height);
}
round_shift_16bit(buf0, height, shift[0]);
col_txfm(buf0, buf0, cos_bit_col);
round_shift_16bit(buf0, height, shift[1]);
transpose_16bit_8x8(buf0, buf1);
transpose_16bit_8x8(buf0 + 8, buf1 + 8);
__m128i *bufl, *bufu;
if (lr_flip) {
bufl = buf0;
bufu = buf0 + 8;
flip_buf_sse2(buf1 + width * 0, bufl, width);
flip_buf_sse2(buf1 + width * 1, bufu, width);
} else {
bufl = buf1 + width * 0;
bufu = buf1 + width * 1;
}
pack_reg(bufl, bufu, buf2);
row_txfm(buf2, buf2, cos_bit_row);
round_shift_16bit_w16_avx2(buf2, width, shift[2]);
transpose_16bit_16x8_avx2(buf2, buf2);
store_rect_buffer_16bit_to_32bit_w8_avx2(buf2, output, width, 8);
}
void lowbd_fwd_txfm2d_16x8_avx2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd) {
(void)bd;
__m128i buf0[16], buf1[16];
__m256i buf2[8];
const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
const int txw_idx = get_txw_idx(TX_16X8);
const int txh_idx = get_txh_idx(TX_16X8);
const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
const int width = 16;
const int height = 8;
const transform_1d_avx2 col_txfm = col_txfm16x8_arr[tx_type];
const transform_1d_sse2 row_txfm = row_txfm16x8_arr[tx_type];
__m128i *buf;
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
if (ud_flip) {
load_buffer_16bit_to_16bit_flip(input + 8 * 0, stride, buf0, height);
load_buffer_16bit_to_16bit_flip(input + 8 * 1, stride, &buf0[8], height);
} else {
load_buffer_16bit_to_16bit(input + 8 * 0, stride, buf0, height);
load_buffer_16bit_to_16bit(input + 8 * 1, stride, &buf0[8], height);
}
pack_reg(buf0, &buf0[8], buf2);
round_shift_16bit_w16_avx2(buf2, height, shift[0]);
col_txfm(buf2, buf2, cos_bit_col);
round_shift_16bit_w16_avx2(buf2, height, shift[1]);
transpose_16bit_16x8_avx2(buf2, buf2);
extract_reg(buf2, buf1);
if (lr_flip) {
buf = buf0;
flip_buf_sse2(buf1, buf, width);
} else {
buf = buf1;
}
row_txfm(buf, buf, cos_bit_row);
round_shift_16bit(buf, width, shift[2]);
transpose_16bit_8x8(buf, buf);
store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
transpose_16bit_8x8(buf + 8, buf + 8);
store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
}
static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform
av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform
@@ -2773,8 +2005,8 @@ static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
lowbd_fwd_txfm2d_64x64_avx2, // 64x64 transform
av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform
av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform
lowbd_fwd_txfm2d_8x16_avx2, // 8x16 transform
lowbd_fwd_txfm2d_16x8_avx2, // 16x8 transform
av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform
av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform
lowbd_fwd_txfm2d_16x32_avx2, // 16x32 transform
lowbd_fwd_txfm2d_32x16_avx2, // 32x16 transform
lowbd_fwd_txfm2d_32x64_avx2, // 32x64 transform
+3 -5
View File
@@ -101,8 +101,6 @@ set_aom_config_var(CONFIG_DENOISE 1 NUMBER
"Denoise/noise modeling support in encoder.")
set_aom_config_var(CONFIG_FILEOPTIONS 1 NUMBER
"Enables encoder config file support.")
set_aom_config_var(CONFIG_FIX_GF_LENGTH 1 NUMBER
"Fix the GF length if possible")
set_aom_config_var(CONFIG_INSPECTION 0 NUMBER "Enables bitstream inspection.")
set_aom_config_var(CONFIG_INTERNAL_STATS 0 NUMBER
"Enables internal encoder stats.")
@@ -118,8 +116,6 @@ set_aom_config_var(DECODE_HEIGHT_LIMIT 0 NUMBER "Set limit for decode height.")
set_aom_config_var(DECODE_WIDTH_LIMIT 0 NUMBER "Set limit for decode width.")
# AV1 experiment flags.
set_aom_config_var(CONFIG_COLLECT_INTER_MODE_RD_STATS 1 NUMBER
"AV1 experiment flag.")
set_aom_config_var(CONFIG_SPEED_STATS 0 NUMBER "AV1 experiment flag.")
set_aom_config_var(CONFIG_COLLECT_RD_STATS 0 NUMBER "AV1 experiment flag.")
set_aom_config_var(CONFIG_DIST_8X8 0 NUMBER "AV1 experiment flag.")
@@ -132,7 +128,9 @@ set_aom_config_var(CONFIG_2PASS_PARTITION_SEARCH_LVL 1 NUMBER
set_aom_config_var(CONFIG_SHARP_SETTINGS 0 NUMBER "AV1 experiment flag.")
set_aom_config_var(CONFIG_ONE_PASS_SVM 0 NUMBER "AV1 experiment flag.")
set_aom_config_var(CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1 NUMBER
"Disable full_pixel_motion_search_based_split on BLOCK_8X8")
"Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0 NUMBER
"Collect stats on partition decisions.")
#
# Variables in this section control optional features of the build system.
+5
View File
@@ -149,6 +149,11 @@ const AvxInterface *get_aom_encoder_by_name(const char *name) {
return NULL;
}
// large scale tile encoding
static const AvxInterface aom_lst_encoder = { "av1", LST_FOURCC,
&aom_codec_av1_cx };
const AvxInterface *get_aom_lst_encoder(void) { return &aom_lst_encoder; }
#endif // CONFIG_AV1_ENCODER
#if CONFIG_AV1_DECODER
+4
View File
@@ -85,6 +85,9 @@ enum {
NV12, // Tile output in NV12 format.
} UENUM1BYTE(OUTPUT_FORMAT);
// The fourcc for large_scale_tile encoding is "LSTC".
#define LST_FOURCC 0x4354534c
struct FileTypeDetectionBuffer {
char buf[4];
size_t buf_read;
@@ -150,6 +153,7 @@ typedef struct AvxInterface {
int get_aom_encoder_count(void);
const AvxInterface *get_aom_encoder_by_index(int i);
const AvxInterface *get_aom_encoder_by_name(const char *name);
const AvxInterface *get_aom_lst_encoder(void);
int get_aom_decoder_count(void);
const AvxInterface *get_aom_decoder_by_index(int i);
+4
View File
@@ -121,3 +121,7 @@ FILE *aom_video_reader_get_file(AvxVideoReader *reader) {
const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader) {
return &reader->info;
}
void aom_video_reader_set_fourcc(AvxVideoReader *reader, uint32_t fourcc) {
reader->info.codec_fourcc = fourcc;
}
+3
View File
@@ -50,6 +50,9 @@ FILE *aom_video_reader_get_file(AvxVideoReader *reader);
// Fills AvxVideoInfo with information from opened video file.
const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader);
// Set fourcc.
void aom_video_reader_set_fourcc(AvxVideoReader *reader, uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
#endif
+4
View File
@@ -75,3 +75,7 @@ int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
return 1;
}
void aom_video_writer_set_fourcc(AvxVideoWriter *writer, uint32_t fourcc) {
writer->info.codec_fourcc = fourcc;
}
+2
View File
@@ -37,6 +37,8 @@ void aom_video_writer_close(AvxVideoWriter *writer);
// Writes frame bytes to the file.
int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
size_t size, int64_t pts);
// Set fourcc.
void aom_video_writer_set_fourcc(AvxVideoWriter *writer, uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
+72
View File
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
/*
* See build_av1_dec_fuzzer.sh for building instructions.
*/
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory>
#include "config/aom_config.h"
#include "aom/aom_decoder.h"
#include "aom/aomdx.h"
#include "aom_ports/mem_ops.h"
#include "common/ivfdec.h"
static void close_file(FILE *file) { fclose(file); }
extern "C" void usage_exit(void) { exit(EXIT_FAILURE); }
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
std::unique_ptr<FILE, decltype(&close_file)> file(
fmemopen((void *)data, size, "rb"), &close_file);
if (file == nullptr) {
return 0;
}
char header[32];
if (fread(header, 1, 32, file.get()) != 32) {
return 0;
}
const AvxInterface *decoder = get_aom_decoder_by_name("av1");
if (decoder == nullptr) {
return 0;
}
aom_codec_ctx_t codec;
// Set thread count in the range [1, 64].
const unsigned int threads = (header[0] & 0x3f) + 1;
aom_codec_dec_cfg_t cfg = { threads, 0, 0, CONFIG_LOWBITDEPTH };
if (aom_codec_dec_init(&codec, decoder->codec_interface(), &cfg, 0)) {
return 0;
}
uint8_t *buffer = nullptr;
size_t buffer_size = 0;
size_t frame_size = 0;
while (!ivf_read_frame(file.get(), &buffer, &frame_size, &buffer_size,
nullptr)) {
const aom_codec_err_t err =
aom_codec_decode(&codec, buffer, frame_size, nullptr);
static_cast<void>(err);
aom_codec_iter_t iter = nullptr;
aom_image_t *img = nullptr;
while ((img = aom_codec_get_frame(&codec, &iter)) != nullptr) {
}
}
aom_codec_destroy(&codec);
free(buffer);
return 0;
}
+78
View File
@@ -0,0 +1,78 @@
#!/bin/bash
#
# Copyright (c) 2019, Alliance for Open Media. All rights reserved
#
# This source code is subject to the terms of the BSD 2 Clause License and
# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
# was not distributed with this source code in the LICENSE file, you can
# obtain it at www.aomedia.org/license/software. If the Alliance for Open
# Media Patent License 1.0 was not distributed with this source code in the
# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
#
###############################################################################
# Fuzzer for libaom decoder.
# ==========================
# Requirements
# ---------------------
# Clang6.0 or above (must support -fsanitize=fuzzer)
#
# References:
# ---------------------
# http://llvm.org/docs/LibFuzzer.html
# https://github.com/google/oss-fuzz
#
# Steps to build / run
# ---------------------
set -eu
# Have a copy of AOM and a build directory ready.
if [[ $# -ne 2 ]]; then
echo "Pass in the AOM source tree as first argument, and a build directory "
echo "as the second argument. The AOM source tree can be obtained via: "
echo " git clone https://aomedia.googlesource.com/aom"
exit 2
fi
if [[ -z "$CC" ]]; then
echo "Set the CC environment variable to point to your C compiler."
exit 2
fi
if [[ -z "$CXX" ]]; then
echo "Set the CXX environment variable to point to your C++ compiler."
exit 2
fi
AOM_DIR=$1
BUILD_DIR=$2
# Run CMake with address sanitizer enabled and build the codec.
# Enable DO_RANGE_CHECK_CLAMP to suppress the noise of integer overflows
# in the transform functions. Also set memory limits.
EXTRA_C_FLAGS='-DDO_RANGE_CHECK_CLAMP=1 -DAOM_MAX_ALLOCABLE_MEMORY=1073741824'
cd "${BUILD_DIR}"
cmake "${AOM_DIR}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCONFIG_PIC=1 \
-DCONFIG_SCALABILITY=0 -DCONFIG_LOWBITDEPTH=1 -DCONFIG_AV1_ENCODER=0 \
-DENABLE_EXAMPLES=0 -DENABLE_DOCS=0 -DENABLE_TESTS=0 -DCONFIG_SIZE_LIMIT=1 \
-DDECODE_HEIGHT_LIMIT=12288 -DDECODE_WIDTH_LIMIT=12288 \
-DAOM_EXTRA_C_FLAGS="${EXTRA_C_FLAGS}" \
-DAOM_EXTRA_CXX_FLAGS="${EXTRA_C_FLAGS}" -DSANITIZE=address
# Build the codec.
make -j$(nproc)
# Build some libaom utils that are not part of the core lib.
$CC -std=c99 -c -I${AOM_DIR} -I${BUILD_DIR} \
${AOM_DIR}/common/ivfdec.c -o ${BUILD_DIR}/ivfdec.o
$CC -std=c99 -c -I${AOM_DIR} -I${BUILD_DIR} \
${AOM_DIR}/common/tools_common.c -o ${BUILD_DIR}/tools_common.o
# Build the av1 fuzzer
$CXX -std=c++11 -DDECODER=av1 -I${AOM_DIR} -I${BUILD_DIR} \
-fsanitize=fuzzer -Wl,--start-group \
${AOM_DIR}/examples/av1_dec_fuzzer.cc -o ${BUILD_DIR}/av1_dec_fuzzer \
${BUILD_DIR}/libaom.a ${BUILD_DIR}/ivfdec.o ${BUILD_DIR}/tools_common.o \
-Wl,--end-group
echo "Fuzzer built at ${BUILD_DIR}/av1_dec_fuzzer."
echo "Create a corpus directory, copy IVF files in there, and run:"
echo " av1_dec_fuzzer CORPUS_DIR"
@@ -211,6 +211,8 @@ int main(int argc, char **argv) {
num_references = (int)strtol(argv[3], NULL, 0);
info = aom_video_reader_get_info(reader);
aom_video_reader_set_fourcc(reader, AV1_FOURCC);
// The writer to write out ivf file in tile list OBU, which can be decoded by
// AV1 decoder.
writer = aom_video_writer_open(argv[2], kContainerIVF, info);
+4 -2
View File
@@ -188,8 +188,10 @@ int main(int argc, char **argv) {
info = aom_video_reader_get_info(reader);
decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
if (!decoder) die("Unknown input codec.");
if (info->codec_fourcc == LST_FOURCC)
decoder = get_aom_decoder_by_fourcc(AV1_FOURCC);
else
die("Unknown input codec.");
printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
+4
View File
@@ -397,6 +397,10 @@ static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
for (i = 0; i < reference_image_num; i++) aom_img_free(&reference_images[i]);
if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
// Modify large_scale_file fourcc.
if (cfg->large_scale_tile == 1)
aom_video_writer_set_fourcc(writer, LST_FOURCC);
aom_video_writer_close(writer);
printf("\nSecond pass complete. Processed %d frames.\n", frame_count);
+1 -64
View File
@@ -287,67 +287,6 @@ void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
}
}
}
void AV1FwdTxfm2dSpeedTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
TxfmParam param;
memset(&param, 0, sizeof(param));
const int rows = tx_size_high[tx_size];
const int cols = tx_size_wide[tx_size];
const int num_loops = 1000000 / (rows * cols);
for (int i = 0; i < 2; ++i) {
const int bd = 8;
for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
if (libaom_test::IsTxSizeTypeValid(
tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
continue;
}
FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
if (ref_func != NULL) {
DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
int input_stride = 64;
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
}
}
param.tx_type = (TX_TYPE)tx_type;
param.tx_size = (TX_SIZE)tx_size;
param.tx_set_type = EXT_TX_SET_ALL16;
param.bd = bd;
aom_usec_timer ref_timer, test_timer;
aom_usec_timer_start(&ref_timer);
for (int i = 0; i < num_loops; ++i) {
ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
}
aom_usec_timer_mark(&ref_timer);
const int elapsed_time_c =
static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
aom_usec_timer_start(&test_timer);
for (int i = 0; i < num_loops; ++i) {
target_func(input, output, input_stride, &param);
}
aom_usec_timer_mark(&test_timer);
const int elapsed_time_simd =
static_cast<int>(aom_usec_timer_elapsed(&test_timer));
printf(
"txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
"gain=%d \n",
tx_size, tx_type, elapsed_time_c, elapsed_time_simd,
(elapsed_time_c / elapsed_time_simd));
}
}
}
}
typedef ::testing::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
@@ -356,9 +295,7 @@ class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
TEST_P(AV1FwdTxfm2dTest, match) {
AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
}
TEST_P(AV1FwdTxfm2dTest, DISABLED_Speed) {
AV1FwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
}
using ::testing::Combine;
using ::testing::Values;
using ::testing::ValuesIn;
+3
View File
@@ -411,6 +411,9 @@ INSTANTIATE_TEST_CASE_P(
TX_16X16, TYPE_B, AOM_BITS_8),
make_tuple(&aom_quantize_b_32x32_c,
&aom_quantize_b_32x32_ssse3, TX_32X32, TYPE_B,
AOM_BITS_8),
make_tuple(&aom_quantize_b_64x64_c,
&aom_quantize_b_64x64_ssse3, TX_64X64, TYPE_B,
AOM_BITS_8)));
#endif // HAVE_SSSE3 && ARCH_X86_64
+1 -1
View File
@@ -297,7 +297,7 @@ class ResizeInternalTestLarge : public ResizeTest {
virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.5);
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 3.0);
}
#if WRITE_COMPRESSED_STREAM