mirror of
https://github.com/roytam1/basilisk55.git
synced 2026-05-26 15:02:46 +00:00
update libaom to rev 0ec86ac7ae1e32a7e70410fa4972a655ec3670a4 (without moz.build and aom_ports/aom_once.h)
This commit is contained in:
@@ -10,4 +10,4 @@ The upstream aom git repository is:
|
||||
|
||||
https://aomedia.googlesource.com/aom
|
||||
|
||||
The git commit ID used was b46542180d551d5e4eb666cf35dd62395ba43f3e.
|
||||
The git commit ID used was 0ec86ac7ae1e32a7e70410fa4972a655ec3670a4.
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
.equ CONFIG_BIG_ENDIAN, 0
|
||||
.equ CONFIG_BITSTREAM_DEBUG, 0
|
||||
.equ CONFIG_COEFFICIENT_RANGE_CHECKING, 0
|
||||
.equ CONFIG_COLLECT_INTER_MODE_RD_STATS, 0
|
||||
.equ CONFIG_COLLECT_PARTITION_STATS, 0
|
||||
.equ CONFIG_COLLECT_RD_STATS, 0
|
||||
.equ CONFIG_DEBUG, 0
|
||||
.equ CONFIG_DENOISE, 1
|
||||
@@ -30,7 +30,6 @@
|
||||
.equ CONFIG_DIST_8X8, 0
|
||||
.equ CONFIG_ENTROPY_STATS, 0
|
||||
.equ CONFIG_FILEOPTIONS, 1
|
||||
.equ CONFIG_FIX_GF_LENGTH, 1
|
||||
.equ CONFIG_FP_MB_STATS, 0
|
||||
.equ CONFIG_GCC, 1
|
||||
.equ CONFIG_GCOV, 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
@@ -22,7 +22,7 @@ CONFIG_AV1_ENCODER equ 0
|
||||
CONFIG_BIG_ENDIAN equ 0
|
||||
CONFIG_BITSTREAM_DEBUG equ 0
|
||||
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
||||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
|
||||
CONFIG_COLLECT_PARTITION_STATS equ 0
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 1
|
||||
@@ -30,7 +30,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
|
||||
CONFIG_DIST_8X8 equ 0
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
CONFIG_FIX_GF_LENGTH equ 1
|
||||
CONFIG_FP_MB_STATS equ 0
|
||||
CONFIG_GCC equ 1
|
||||
CONFIG_GCOV equ 0
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define CONFIG_BIG_ENDIAN 0
|
||||
#define CONFIG_BITSTREAM_DEBUG 0
|
||||
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
|
||||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
|
||||
#define CONFIG_COLLECT_PARTITION_STATS 0
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 1
|
||||
@@ -32,7 +32,6 @@
|
||||
#define CONFIG_DIST_8X8 0
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
#define CONFIG_FIX_GF_LENGTH 1
|
||||
#define CONFIG_FP_MB_STATS 0
|
||||
#define CONFIG_GCC 1
|
||||
#define CONFIG_GCOV 0
|
||||
|
||||
Vendored
+25
@@ -973,9 +973,22 @@ enum aome_enc_control_id {
|
||||
/*!\brief Control to use a reduced tx type set */
|
||||
AV1E_SET_REDUCED_TX_TYPE_SET,
|
||||
|
||||
/*!\brief Control to use dct only for intra modes */
|
||||
AV1E_SET_INTRA_DCT_ONLY,
|
||||
|
||||
/*!\brief Control to use dct only for inter modes */
|
||||
AV1E_SET_INTER_DCT_ONLY,
|
||||
|
||||
/*!\brief Control to use adaptive quantize_b */
|
||||
AV1E_SET_QUANT_B_ADAPT,
|
||||
|
||||
/*!\brief Control to select maximum height for the GF group pyramid structure
|
||||
* (valid values: 1 - 4) */
|
||||
AV1E_SET_GF_MAX_PYRAMID_HEIGHT,
|
||||
|
||||
/*!\brief Control to select maximum reference frames allowed per frame
|
||||
* (valid values: 3 - 7) */
|
||||
AV1E_SET_MAX_REFERENCE_FRAMES,
|
||||
};
|
||||
|
||||
/*!\brief aom 1-D scaling mode
|
||||
@@ -1350,9 +1363,21 @@ AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SUBSAMPLING_Y, unsigned int)
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_REDUCED_TX_TYPE_SET, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_REDUCED_TX_TYPE_SET
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_INTRA_DCT_ONLY, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_INTRA_DCT_ONLY
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_INTER_DCT_ONLY, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_INTER_DCT_ONLY
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_QUANT_B_ADAPT, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_QUANT_B_ADAPT
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_GF_MAX_PYRAMID_HEIGHT, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_GF_MAX_PYRAMID_HEIGHT
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_MAX_REFERENCE_FRAMES, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_MAX_REFERENCE_FRAMES
|
||||
|
||||
/*!\endcond */
|
||||
/*! @} - end defgroup aom_encoder */
|
||||
#ifdef __cplusplus
|
||||
|
||||
+1
@@ -226,6 +226,7 @@ if(CONFIG_AV1_ENCODER)
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/quantize_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/variance_impl_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")
|
||||
|
||||
@@ -522,6 +522,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
||||
specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
|
||||
|
||||
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/aom_quantize_b_64x64 ssse3/;
|
||||
} # CONFIG_AV1_ENCODER
|
||||
|
||||
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
||||
|
||||
Vendored
+194
@@ -12,6 +12,68 @@
|
||||
#include "aom_dsp/quantize.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
void quantize_b_adaptive_helper_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
|
||||
const qm_val_t *iqm_ptr, const int log_scale) {
|
||||
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
|
||||
ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
|
||||
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
|
||||
int i, non_zero_count = (int)n_coeffs, eob = -1;
|
||||
(void)iscan;
|
||||
|
||||
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||
|
||||
// Pre-scan pass
|
||||
for (i = (int)n_coeffs - 1; i >= 0; i--) {
|
||||
const int rc = scan[i];
|
||||
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
const int coeff = coeff_ptr[rc] * wt;
|
||||
|
||||
int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
|
||||
if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
|
||||
coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
|
||||
non_zero_count--;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// Quantization pass: All coefficients with index >= zero_flag are
|
||||
// skippable. Note: zero_flag can be zero.
|
||||
for (i = 0; i < non_zero_count; i++) {
|
||||
const int rc = scan[i];
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
int tmp32;
|
||||
|
||||
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
|
||||
int64_t tmp =
|
||||
clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
|
||||
INT16_MIN, INT16_MAX);
|
||||
tmp *= wt;
|
||||
tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
|
||||
quant_shift_ptr[rc != 0]) >>
|
||||
(16 - log_scale + AOM_QM_BITS)); // quantization
|
||||
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
|
||||
const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
const int dequant =
|
||||
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
|
||||
AOM_QM_BITS;
|
||||
const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
|
||||
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
|
||||
|
||||
if (tmp32) eob = i;
|
||||
}
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
@@ -74,6 +136,64 @@ void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void highbd_quantize_b_adaptive_helper_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
|
||||
const qm_val_t *iqm_ptr, const int log_scale) {
|
||||
int i, eob = -1;
|
||||
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
|
||||
ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
|
||||
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
|
||||
int dequant;
|
||||
int idx_arr[4096];
|
||||
(void)iscan;
|
||||
int idx = 0;
|
||||
|
||||
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||
|
||||
// Pre-scan pass
|
||||
for (i = 0; i < n_coeffs; i++) {
|
||||
const int rc = scan[i];
|
||||
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
const int coeff = coeff_ptr[rc] * wt;
|
||||
|
||||
// If the coefficient is out of the base ZBIN range, keep it for
|
||||
// quantization.
|
||||
int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
|
||||
if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
|
||||
coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
|
||||
idx_arr[idx++] = i;
|
||||
}
|
||||
|
||||
// Quantization pass: only process the coefficients selected in
|
||||
// pre-scan pass. Note: idx can be zero.
|
||||
for (i = 0; i < idx; i++) {
|
||||
const int rc = scan[idx_arr[i]];
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
|
||||
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
const int64_t tmp1 =
|
||||
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
|
||||
const int64_t tmpw = tmp1 * wt;
|
||||
const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
|
||||
const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
|
||||
(16 - log_scale + AOM_QM_BITS));
|
||||
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
|
||||
dequant =
|
||||
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
|
||||
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
|
||||
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
|
||||
if (abs_qcoeff) eob = idx_arr[i];
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void highbd_quantize_b_helper_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
@@ -133,6 +253,80 @@ void highbd_quantize_b_helper_c(
|
||||
|
||||
/* These functions should only be called when quantisation matrices
|
||||
are not used. */
|
||||
void aom_quantize_b_adaptive_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
|
||||
NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void aom_quantize_b_32x32_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
|
||||
NULL, NULL, 1);
|
||||
}
|
||||
|
||||
void aom_quantize_b_64x64_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
|
||||
NULL, NULL, 2);
|
||||
}
|
||||
|
||||
void aom_highbd_quantize_b_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
|
||||
iscan, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void aom_highbd_quantize_b_32x32_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
|
||||
iscan, NULL, NULL, 1);
|
||||
}
|
||||
|
||||
void aom_highbd_quantize_b_64x64_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
|
||||
quant_ptr, quant_shift_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
|
||||
iscan, NULL, NULL, 2);
|
||||
}
|
||||
|
||||
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
|
||||
|
||||
Vendored
+60
@@ -20,6 +20,66 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void quantize_b_adaptive_helper_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
|
||||
const qm_val_t *iqm_ptr, const int log_scale);
|
||||
|
||||
void aom_quantize_b_adaptive_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void aom_quantize_b_32x32_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void aom_quantize_b_64x64_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void highbd_quantize_b_adaptive_helper_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
|
||||
const qm_val_t *iqm_ptr, const int log_scale);
|
||||
|
||||
void aom_highbd_quantize_b_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void aom_highbd_quantize_b_32x32_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void aom_highbd_quantize_b_64x64_adaptive_c(
|
||||
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr, const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
|
||||
-22
@@ -18,28 +18,6 @@
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/x86/quantize_x86.h"
|
||||
|
||||
static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
|
||||
assert(sizeof(tran_low_t) == 4);
|
||||
|
||||
return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
|
||||
(int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
|
||||
(int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
|
||||
(int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
|
||||
}
|
||||
|
||||
static INLINE void store_coefficients(__m128i coeff_vals,
|
||||
tran_low_t *coeff_ptr) {
|
||||
assert(sizeof(tran_low_t) == 4);
|
||||
|
||||
__m128i one = _mm_set1_epi16(1);
|
||||
__m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
|
||||
__m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
|
||||
__m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
|
||||
__m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
|
||||
_mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
|
||||
_mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
|
||||
}
|
||||
|
||||
void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
|
||||
+192
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/x86/quantize_x86.h"
|
||||
|
||||
static INLINE void calculate_qcoeff_64x64(__m128i *coeff, const __m128i round,
|
||||
const __m128i quant,
|
||||
const __m128i *shift) {
|
||||
__m128i tmp, qcoeff, tmp1;
|
||||
qcoeff = _mm_adds_epi16(*coeff, round);
|
||||
tmp = _mm_mulhi_epi16(qcoeff, quant);
|
||||
qcoeff = _mm_add_epi16(tmp, qcoeff);
|
||||
tmp = _mm_mullo_epi16(qcoeff, *shift);
|
||||
tmp = _mm_srli_epi16(tmp, 14);
|
||||
tmp1 = _mm_mulhi_epi16(qcoeff, *shift);
|
||||
tmp1 = _mm_slli_epi16(tmp1, 2);
|
||||
*coeff = _mm_or_si128(tmp, tmp1);
|
||||
}
|
||||
|
||||
static INLINE void calculate_dqcoeff_and_store_64x64(const __m128i qcoeff,
|
||||
const __m128i dequant,
|
||||
const __m128i zero,
|
||||
tran_low_t *dqcoeff) {
|
||||
// Un-sign to bias rounding like C.
|
||||
const __m128i coeff = _mm_abs_epi16(qcoeff);
|
||||
|
||||
const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
|
||||
const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
|
||||
|
||||
const __m128i low = _mm_mullo_epi16(coeff, dequant);
|
||||
const __m128i high = _mm_mulhi_epi16(coeff, dequant);
|
||||
__m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high);
|
||||
__m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high);
|
||||
|
||||
// "Divide" by 4.
|
||||
dqcoeff32_0 = _mm_srli_epi32(dqcoeff32_0, 2);
|
||||
dqcoeff32_1 = _mm_srli_epi32(dqcoeff32_1, 2);
|
||||
|
||||
dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
|
||||
dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
|
||||
|
||||
_mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
|
||||
_mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
|
||||
}
|
||||
|
||||
void aom_quantize_b_64x64_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
const __m128i two = _mm_set1_epi16(2);
|
||||
int index;
|
||||
|
||||
__m128i zbin, round, quant, dequant, shift;
|
||||
__m128i coeff0, coeff1, qcoeff0, qcoeff1;
|
||||
__m128i cmp_mask0, cmp_mask1, all_zero;
|
||||
__m128i eob = zero, eob0;
|
||||
|
||||
(void)scan;
|
||||
(void)n_coeffs;
|
||||
|
||||
// Setup global values.
|
||||
zbin = _mm_load_si128((const __m128i *)zbin_ptr);
|
||||
round = _mm_load_si128((const __m128i *)round_ptr);
|
||||
quant = _mm_load_si128((const __m128i *)quant_ptr);
|
||||
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
|
||||
shift = _mm_load_si128((const __m128i *)quant_shift_ptr);
|
||||
|
||||
// Shift with rounding.
|
||||
zbin = _mm_add_epi16(zbin, two);
|
||||
round = _mm_add_epi16(round, two);
|
||||
zbin = _mm_srli_epi16(zbin, 2);
|
||||
round = _mm_srli_epi16(round, 2);
|
||||
zbin = _mm_sub_epi16(zbin, one);
|
||||
// Do DC and first 15 AC.
|
||||
coeff0 = load_coefficients(coeff_ptr);
|
||||
coeff1 = load_coefficients(coeff_ptr + 8);
|
||||
|
||||
qcoeff0 = _mm_abs_epi16(coeff0);
|
||||
qcoeff1 = _mm_abs_epi16(coeff1);
|
||||
|
||||
cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
|
||||
zbin = _mm_unpackhi_epi64(zbin, zbin);
|
||||
cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
|
||||
all_zero = _mm_or_si128(cmp_mask0, cmp_mask1);
|
||||
if (_mm_movemask_epi8(all_zero) == 0) {
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + 4), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + 8), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + 12), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + 4), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + 8), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + 12), zero);
|
||||
round = _mm_unpackhi_epi64(round, round);
|
||||
quant = _mm_unpackhi_epi64(quant, quant);
|
||||
shift = _mm_unpackhi_epi64(shift, shift);
|
||||
dequant = _mm_unpackhi_epi64(dequant, dequant);
|
||||
} else {
|
||||
calculate_qcoeff_64x64(&qcoeff0, round, quant, &shift);
|
||||
round = _mm_unpackhi_epi64(round, round);
|
||||
quant = _mm_unpackhi_epi64(quant, quant);
|
||||
shift = _mm_unpackhi_epi64(shift, shift);
|
||||
calculate_qcoeff_64x64(&qcoeff1, round, quant, &shift);
|
||||
|
||||
// Reinsert signs.
|
||||
qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0);
|
||||
qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1);
|
||||
|
||||
// Mask out zbin threshold coeffs.
|
||||
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
|
||||
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
|
||||
|
||||
store_coefficients(qcoeff0, qcoeff_ptr);
|
||||
store_coefficients(qcoeff1, qcoeff_ptr + 8);
|
||||
|
||||
calculate_dqcoeff_and_store_64x64(qcoeff0, dequant, zero, dqcoeff_ptr);
|
||||
dequant = _mm_unpackhi_epi64(dequant, dequant);
|
||||
calculate_dqcoeff_and_store_64x64(qcoeff1, dequant, zero, dqcoeff_ptr + 8);
|
||||
|
||||
eob =
|
||||
scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero);
|
||||
}
|
||||
|
||||
// AC only loop.
|
||||
for (index = 16; index < 1024; index += 16) {
|
||||
coeff0 = load_coefficients(coeff_ptr + index);
|
||||
coeff1 = load_coefficients(coeff_ptr + index + 8);
|
||||
|
||||
qcoeff0 = _mm_abs_epi16(coeff0);
|
||||
qcoeff1 = _mm_abs_epi16(coeff1);
|
||||
|
||||
cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
|
||||
cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
|
||||
|
||||
all_zero = _mm_or_si128(cmp_mask0, cmp_mask1);
|
||||
if (_mm_movemask_epi8(all_zero) == 0) {
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + index), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 4), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 8), zero);
|
||||
_mm_store_si128((__m128i *)(qcoeff_ptr + index + 12), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + index), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 4), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 8), zero);
|
||||
_mm_store_si128((__m128i *)(dqcoeff_ptr + index + 12), zero);
|
||||
continue;
|
||||
}
|
||||
calculate_qcoeff_64x64(&qcoeff0, round, quant, &shift);
|
||||
calculate_qcoeff_64x64(&qcoeff1, round, quant, &shift);
|
||||
|
||||
qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0);
|
||||
qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1);
|
||||
|
||||
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
|
||||
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
|
||||
|
||||
store_coefficients(qcoeff0, qcoeff_ptr + index);
|
||||
store_coefficients(qcoeff1, qcoeff_ptr + index + 8);
|
||||
|
||||
calculate_dqcoeff_and_store_64x64(qcoeff0, dequant, zero,
|
||||
dqcoeff_ptr + index);
|
||||
calculate_dqcoeff_and_store_64x64(qcoeff1, dequant, zero,
|
||||
dqcoeff_ptr + 8 + index);
|
||||
|
||||
eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index,
|
||||
zero);
|
||||
eob = _mm_max_epi16(eob, eob0);
|
||||
}
|
||||
|
||||
*eob_ptr = accumulate_eob(eob);
|
||||
}
|
||||
+20
@@ -75,3 +75,23 @@ static INLINE int16_t accumulate_eob(__m128i eob) {
|
||||
eob = _mm_max_epi16(eob, eob_shuffled);
|
||||
return _mm_extract_epi16(eob, 1);
|
||||
}
|
||||
|
||||
static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
|
||||
assert(sizeof(tran_low_t) == 4);
|
||||
const __m128i coeff1 = _mm_load_si128((__m128i *)(coeff_ptr));
|
||||
const __m128i coeff2 = _mm_load_si128((__m128i *)(coeff_ptr + 4));
|
||||
return _mm_packs_epi32(coeff1, coeff2);
|
||||
}
|
||||
|
||||
static INLINE void store_coefficients(__m128i coeff_vals,
|
||||
tran_low_t *coeff_ptr) {
|
||||
assert(sizeof(tran_low_t) == 4);
|
||||
|
||||
__m128i one = _mm_set1_epi16(1);
|
||||
__m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
|
||||
__m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
|
||||
__m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
|
||||
__m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
|
||||
_mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
|
||||
_mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
|
||||
}
|
||||
|
||||
-82
@@ -167,35 +167,7 @@ static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
|
||||
out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
|
||||
out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
|
||||
}
|
||||
static INLINE void transpose_16bit_16x8_avx2(const __m256i *const in,
|
||||
__m256i *const out) {
|
||||
const __m256i a0 = _mm256_unpacklo_epi16(in[0], in[1]);
|
||||
const __m256i a1 = _mm256_unpacklo_epi16(in[2], in[3]);
|
||||
const __m256i a2 = _mm256_unpacklo_epi16(in[4], in[5]);
|
||||
const __m256i a3 = _mm256_unpacklo_epi16(in[6], in[7]);
|
||||
const __m256i a4 = _mm256_unpackhi_epi16(in[0], in[1]);
|
||||
const __m256i a5 = _mm256_unpackhi_epi16(in[2], in[3]);
|
||||
const __m256i a6 = _mm256_unpackhi_epi16(in[4], in[5]);
|
||||
const __m256i a7 = _mm256_unpackhi_epi16(in[6], in[7]);
|
||||
|
||||
const __m256i b0 = _mm256_unpacklo_epi32(a0, a1);
|
||||
const __m256i b1 = _mm256_unpacklo_epi32(a2, a3);
|
||||
const __m256i b2 = _mm256_unpacklo_epi32(a4, a5);
|
||||
const __m256i b3 = _mm256_unpacklo_epi32(a6, a7);
|
||||
const __m256i b4 = _mm256_unpackhi_epi32(a0, a1);
|
||||
const __m256i b5 = _mm256_unpackhi_epi32(a2, a3);
|
||||
const __m256i b6 = _mm256_unpackhi_epi32(a4, a5);
|
||||
const __m256i b7 = _mm256_unpackhi_epi32(a6, a7);
|
||||
|
||||
out[0] = _mm256_unpacklo_epi64(b0, b1);
|
||||
out[1] = _mm256_unpackhi_epi64(b0, b1);
|
||||
out[2] = _mm256_unpacklo_epi64(b4, b5);
|
||||
out[3] = _mm256_unpackhi_epi64(b4, b5);
|
||||
out[4] = _mm256_unpacklo_epi64(b2, b3);
|
||||
out[5] = _mm256_unpackhi_epi64(b2, b3);
|
||||
out[6] = _mm256_unpacklo_epi64(b6, b7);
|
||||
out[7] = _mm256_unpackhi_epi64(b6, b7);
|
||||
}
|
||||
static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
out[size - i - 1] = in[i];
|
||||
@@ -263,61 +235,7 @@ static INLINE void av1_round_shift_rect_array_32_avx2(__m256i *input,
|
||||
}
|
||||
}
|
||||
}
|
||||
static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
|
||||
const __m256i scale_rounding =
|
||||
pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
|
||||
const __m256i b = _mm256_madd_epi16(a, scale_rounding);
|
||||
return _mm256_srai_epi32(b, NewSqrt2Bits);
|
||||
}
|
||||
static INLINE void store_rect_16bit_to_32bit_w8_avx2(const __m256i a,
|
||||
int32_t *const b) {
|
||||
const __m256i one = _mm256_set1_epi16(1);
|
||||
const __m256i a_lo = _mm256_unpacklo_epi16(a, one);
|
||||
const __m256i a_hi = _mm256_unpackhi_epi16(a, one);
|
||||
const __m256i b_lo = scale_round_avx2(a_lo, NewSqrt2);
|
||||
const __m256i b_hi = scale_round_avx2(a_hi, NewSqrt2);
|
||||
const __m256i temp = _mm256_permute2f128_si256(b_lo, b_hi, 0x31);
|
||||
_mm_store_si128((__m128i *)b, _mm256_castsi256_si128(b_lo));
|
||||
_mm_store_si128((__m128i *)(b + 4), _mm256_castsi256_si128(b_hi));
|
||||
_mm256_store_si256((__m256i *)(b + 64), temp);
|
||||
}
|
||||
static INLINE void store_rect_buffer_16bit_to_32bit_w8_avx2(
|
||||
const __m256i *const in, int32_t *const out, const int stride,
|
||||
const int out_size) {
|
||||
for (int i = 0; i < out_size; ++i) {
|
||||
store_rect_16bit_to_32bit_w8_avx2(in[i], out + i * stride);
|
||||
}
|
||||
}
|
||||
static INLINE void pack_reg(const __m128i *in1, const __m128i *in2,
|
||||
__m256i *out) {
|
||||
out[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[0]), in2[0], 0x1);
|
||||
out[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[1]), in2[1], 0x1);
|
||||
out[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[2]), in2[2], 0x1);
|
||||
out[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[3]), in2[3], 0x1);
|
||||
out[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[4]), in2[4], 0x1);
|
||||
out[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[5]), in2[5], 0x1);
|
||||
out[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[6]), in2[6], 0x1);
|
||||
out[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[7]), in2[7], 0x1);
|
||||
}
|
||||
static INLINE void extract_reg(const __m256i *in, __m128i *out1) {
|
||||
out1[0] = _mm256_castsi256_si128(in[0]);
|
||||
out1[1] = _mm256_castsi256_si128(in[1]);
|
||||
out1[2] = _mm256_castsi256_si128(in[2]);
|
||||
out1[3] = _mm256_castsi256_si128(in[3]);
|
||||
out1[4] = _mm256_castsi256_si128(in[4]);
|
||||
out1[5] = _mm256_castsi256_si128(in[5]);
|
||||
out1[6] = _mm256_castsi256_si128(in[6]);
|
||||
out1[7] = _mm256_castsi256_si128(in[7]);
|
||||
|
||||
out1[8] = _mm256_extractf128_si256(in[0], 0x01);
|
||||
out1[9] = _mm256_extractf128_si256(in[1], 0x01);
|
||||
out1[10] = _mm256_extractf128_si256(in[2], 0x01);
|
||||
out1[11] = _mm256_extractf128_si256(in[3], 0x01);
|
||||
out1[12] = _mm256_extractf128_si256(in[4], 0x01);
|
||||
out1[13] = _mm256_extractf128_si256(in[5], 0x01);
|
||||
out1[14] = _mm256_extractf128_si256(in[6], 0x01);
|
||||
out1[15] = _mm256_extractf128_si256(in[7], 0x01);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
+1
-1
@@ -26,7 +26,7 @@ extern "C" {
|
||||
#define AOM_INTERP_EXTEND 4
|
||||
#define AOM_BORDER_IN_PIXELS 288
|
||||
#define AOM_ENC_NO_SCALE_BORDER 160
|
||||
#define AOM_DEC_BORDER_IN_PIXELS 288
|
||||
#define AOM_DEC_BORDER_IN_PIXELS 64
|
||||
|
||||
typedef struct yv12_buffer_config {
|
||||
union {
|
||||
|
||||
Vendored
+8
-1
@@ -484,6 +484,7 @@ static int main_loop(int argc, const char **argv_) {
|
||||
input.webm_ctx = &webm_ctx;
|
||||
#endif
|
||||
struct ObuDecInputContext obu_ctx = { NULL, NULL, 0, 0, 0 };
|
||||
int is_ivf = 0;
|
||||
|
||||
obu_ctx.avx_ctx = &aom_input_ctx;
|
||||
input.obu_ctx = &obu_ctx;
|
||||
@@ -610,8 +611,10 @@ static int main_loop(int argc, const char **argv_) {
|
||||
#endif
|
||||
input.aom_input_ctx->filename = fn;
|
||||
input.aom_input_ctx->file = infile;
|
||||
if (file_is_ivf(input.aom_input_ctx))
|
||||
if (file_is_ivf(input.aom_input_ctx)) {
|
||||
input.aom_input_ctx->file_type = FILE_TYPE_IVF;
|
||||
is_ivf = 1;
|
||||
}
|
||||
#if CONFIG_WEBM_IO
|
||||
else if (file_is_webm(input.webm_ctx, input.aom_input_ctx))
|
||||
input.aom_input_ctx->file_type = FILE_TYPE_WEBM;
|
||||
@@ -661,6 +664,10 @@ static int main_loop(int argc, const char **argv_) {
|
||||
}
|
||||
|
||||
fourcc_interface = get_aom_decoder_by_fourcc(aom_input_ctx.fourcc);
|
||||
|
||||
if (is_ivf && !fourcc_interface)
|
||||
fatal("Unsupported fourcc: %x\n", aom_input_ctx.fourcc);
|
||||
|
||||
if (interface && fourcc_interface && interface != fourcc_interface)
|
||||
warn("Header indicates codec: %s\n", fourcc_interface->name);
|
||||
else
|
||||
|
||||
Vendored
+27
-5
@@ -263,9 +263,9 @@ static const arg_def_t global_error_resilient =
|
||||
"Enable global error resiliency features");
|
||||
static const arg_def_t lag_in_frames =
|
||||
ARG_DEF(NULL, "lag-in-frames", 1, "Max number of frames to lag");
|
||||
static const arg_def_t large_scale_tile =
|
||||
ARG_DEF(NULL, "large-scale-tile", 1,
|
||||
"Large scale tile coding (0: off (default), 1: on)");
|
||||
static const arg_def_t large_scale_tile = ARG_DEF(
|
||||
NULL, "large-scale-tile", 1,
|
||||
"Large scale tile coding (0: off (default), 1: on (ivf output only))");
|
||||
static const arg_def_t monochrome =
|
||||
ARG_DEF(NULL, "monochrome", 0, "Monochrome video (no chroma planes)");
|
||||
static const arg_def_t full_still_picture_hdr = ARG_DEF(
|
||||
@@ -532,6 +532,12 @@ static const arg_def_t qm_max = ARG_DEF(
|
||||
NULL, "qm-max", 1, "Max quant matrix flatness (0..15), default is 15");
|
||||
static const arg_def_t reduced_tx_type_set = ARG_DEF(
|
||||
NULL, "reduced-tx-type-set", 1, "Use reduced set of transform types");
|
||||
static const arg_def_t use_intra_dct_only =
|
||||
ARG_DEF(NULL, "use-intra-dct-only", 1, "Use DCT only for INTRA modes");
|
||||
static const arg_def_t use_inter_dct_only =
|
||||
ARG_DEF(NULL, "use-inter-dct-only", 1, "Use DCT only for INTER modes");
|
||||
static const arg_def_t quant_b_adapt =
|
||||
ARG_DEF(NULL, "quant-b-adapt", 1, "Use adaptive quantize_b");
|
||||
#if CONFIG_DIST_8X8
|
||||
static const arg_def_t enable_dist_8x8 =
|
||||
ARG_DEF(NULL, "enable-dist-8x8", 1,
|
||||
@@ -602,6 +608,9 @@ static const arg_def_t max_gf_interval = ARG_DEF(
|
||||
static const arg_def_t gf_max_pyr_height =
|
||||
ARG_DEF(NULL, "gf-max-pyr-height", 1,
|
||||
"maximum height for GF group pyramid structure (1 to 4 (default))");
|
||||
static const arg_def_t max_reference_frames = ARG_DEF(
|
||||
NULL, "max-reference-frames", 1,
|
||||
"maximum number of reference frames allowed per frame (3 to 7 (default))");
|
||||
|
||||
static const struct arg_enum_list color_primaries_enum[] = {
|
||||
{ "bt709", AOM_CICP_CP_BT_709 },
|
||||
@@ -752,6 +761,9 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
|
||||
&qm_min,
|
||||
&qm_max,
|
||||
&reduced_tx_type_set,
|
||||
&use_intra_dct_only,
|
||||
&use_inter_dct_only,
|
||||
&quant_b_adapt,
|
||||
#if CONFIG_DIST_8X8
|
||||
&enable_dist_8x8,
|
||||
#endif
|
||||
@@ -779,7 +791,8 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
|
||||
#if CONFIG_DENOISE
|
||||
&denoise_noise_level,
|
||||
&denoise_block_size,
|
||||
#endif
|
||||
#endif // CONFIG_DENOISE
|
||||
&max_reference_frames,
|
||||
&enable_ref_frame_mvs,
|
||||
&bitdeptharg,
|
||||
&inbitdeptharg,
|
||||
@@ -834,6 +847,9 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
|
||||
AV1E_SET_QM_MIN,
|
||||
AV1E_SET_QM_MAX,
|
||||
AV1E_SET_REDUCED_TX_TYPE_SET,
|
||||
AV1E_SET_INTRA_DCT_ONLY,
|
||||
AV1E_SET_INTER_DCT_ONLY,
|
||||
AV1E_SET_QUANT_B_ADAPT,
|
||||
#if CONFIG_DIST_8X8
|
||||
AV1E_SET_ENABLE_DIST_8X8,
|
||||
#endif
|
||||
@@ -861,7 +877,8 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
|
||||
#if CONFIG_DENOISE
|
||||
AV1E_SET_DENOISE_NOISE_LEVEL,
|
||||
AV1E_SET_DENOISE_BLOCK_SIZE,
|
||||
#endif
|
||||
#endif // CONFIG_DENOISE
|
||||
AV1E_SET_MAX_REFERENCE_FRAMES,
|
||||
AV1E_SET_ENABLE_REF_FRAME_MVS,
|
||||
0 };
|
||||
#endif // CONFIG_AV1_ENCODER
|
||||
@@ -1340,6 +1357,7 @@ static int parse_stream_params(struct AvxEncoderConfig *global,
|
||||
config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
|
||||
} else if (arg_match(&arg, &large_scale_tile, argi)) {
|
||||
config->cfg.large_scale_tile = arg_parse_uint(&arg);
|
||||
if (config->cfg.large_scale_tile) global->codec = get_aom_lst_encoder();
|
||||
} else if (arg_match(&arg, &monochrome, argi)) {
|
||||
config->cfg.monochrome = 1;
|
||||
} else if (arg_match(&arg, &full_still_picture_hdr, argi)) {
|
||||
@@ -2097,6 +2115,10 @@ int main(int argc, const char **argv_) {
|
||||
FOREACH_STREAM(stream, streams) {
|
||||
check_encoder_config(global.disable_warning_prompt, &global,
|
||||
&stream->config.cfg);
|
||||
|
||||
// If large_scale_tile = 1, only support to output to ivf format.
|
||||
if (stream->config.cfg.large_scale_tile && !stream->config.write_ivf)
|
||||
die("only support ivf output format while large-scale-tile=1\n");
|
||||
}
|
||||
|
||||
/* Handle non-option arguments */
|
||||
|
||||
Vendored
+2
@@ -191,6 +191,8 @@ list(APPEND AOM_AV1_ENCODER_SOURCES
|
||||
"${AOM_ROOT}/av1/encoder/temporal_filter.h"
|
||||
"${AOM_ROOT}/av1/encoder/tokenize.c"
|
||||
"${AOM_ROOT}/av1/encoder/tokenize.h"
|
||||
"${AOM_ROOT}/av1/encoder/tpl_model.c"
|
||||
"${AOM_ROOT}/av1/encoder/tpl_model.h"
|
||||
"${AOM_ROOT}/av1/encoder/wedge_utils.c"
|
||||
"${AOM_ROOT}/third_party/fastfeat/fast.c"
|
||||
"${AOM_ROOT}/third_party/fastfeat/fast.h"
|
||||
|
||||
Vendored
+49
@@ -96,6 +96,7 @@ struct av1_extracfg {
|
||||
int enable_order_hint; // enable order hint for sequence
|
||||
int enable_tx64; // enable 64-pt transform usage for sequence
|
||||
int enable_dist_wtd_comp; // enable dist wtd compound for sequence
|
||||
int max_reference_frames; // maximum number of references per frame
|
||||
int enable_ref_frame_mvs; // sequence level
|
||||
int allow_ref_frame_mvs; // frame level
|
||||
int enable_masked_comp; // enable masked compound for sequence
|
||||
@@ -123,6 +124,9 @@ struct av1_extracfg {
|
||||
unsigned int chroma_subsampling_x;
|
||||
unsigned int chroma_subsampling_y;
|
||||
int reduced_tx_type_set;
|
||||
int use_intra_dct_only;
|
||||
int use_inter_dct_only;
|
||||
int quant_b_adapt;
|
||||
};
|
||||
|
||||
static struct av1_extracfg default_extra_cfg = {
|
||||
@@ -190,6 +194,7 @@ static struct av1_extracfg default_extra_cfg = {
|
||||
1, // frame order hint
|
||||
1, // enable 64-pt transform usage
|
||||
1, // dist-wtd compound
|
||||
7, // max_reference_frames
|
||||
1, // enable_ref_frame_mvs sequence level
|
||||
1, // allow ref_frame_mvs frame level
|
||||
1, // enable masked compound at sequence level
|
||||
@@ -216,6 +221,9 @@ static struct av1_extracfg default_extra_cfg = {
|
||||
0, // chroma_subsampling_x
|
||||
0, // chroma_subsampling_y
|
||||
0, // reduced_tx_type_set
|
||||
0, // use_intra_dct_only
|
||||
0, // use_inter_dct_only
|
||||
0, // quant_b_adapt
|
||||
};
|
||||
|
||||
struct aom_codec_alg_priv {
|
||||
@@ -419,6 +427,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
|
||||
#endif
|
||||
}
|
||||
|
||||
RANGE_CHECK(extra_cfg, max_reference_frames, 3, 7);
|
||||
RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1);
|
||||
RANGE_CHECK_HI(extra_cfg, chroma_subsampling_y, 1);
|
||||
|
||||
@@ -571,6 +580,9 @@ static aom_codec_err_t set_encoder_config(
|
||||
oxcf->qm_minlevel = extra_cfg->qm_min;
|
||||
oxcf->qm_maxlevel = extra_cfg->qm_max;
|
||||
oxcf->reduced_tx_type_set = extra_cfg->reduced_tx_type_set;
|
||||
oxcf->use_intra_dct_only = extra_cfg->use_intra_dct_only;
|
||||
oxcf->use_inter_dct_only = extra_cfg->use_inter_dct_only;
|
||||
oxcf->quant_b_adapt = extra_cfg->quant_b_adapt;
|
||||
#if CONFIG_DIST_8X8
|
||||
oxcf->using_dist_8x8 = extra_cfg->enable_dist_8x8;
|
||||
if (extra_cfg->tuning == AOM_TUNE_CDEF_DIST ||
|
||||
@@ -711,6 +723,11 @@ static aom_codec_err_t set_encoder_config(
|
||||
oxcf->enable_order_hint = extra_cfg->enable_order_hint;
|
||||
oxcf->enable_dist_wtd_comp =
|
||||
extra_cfg->enable_dist_wtd_comp & extra_cfg->enable_order_hint;
|
||||
oxcf->max_reference_frames = extra_cfg->max_reference_frames;
|
||||
if (oxcf->max_reference_frames > 3 && oxcf->max_reference_frames < 7) {
|
||||
// TODO(urvang): Enable all possible values, after they work properly.
|
||||
oxcf->max_reference_frames = 3;
|
||||
}
|
||||
oxcf->enable_masked_comp = extra_cfg->enable_masked_comp;
|
||||
oxcf->enable_diff_wtd_comp =
|
||||
extra_cfg->enable_masked_comp & extra_cfg->enable_diff_wtd_comp;
|
||||
@@ -1109,6 +1126,13 @@ static aom_codec_err_t ctrl_set_enable_dist_wtd_comp(aom_codec_alg_priv_t *ctx,
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_max_reference_frames(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.max_reference_frames = CAST(AV1E_SET_MAX_REFERENCE_FRAMES, args);
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_enable_ref_frame_mvs(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
@@ -1289,6 +1313,27 @@ static aom_codec_err_t ctrl_set_reduced_tx_type_set(aom_codec_alg_priv_t *ctx,
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_intra_dct_only(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.use_intra_dct_only = CAST(AV1E_SET_INTRA_DCT_ONLY, args);
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_inter_dct_only(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.use_inter_dct_only = CAST(AV1E_SET_INTER_DCT_ONLY, args);
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_quant_b_adapt(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.quant_b_adapt = CAST(AV1E_SET_QUANT_B_ADAPT, args);
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_film_grain_test_vector(
|
||||
aom_codec_alg_priv_t *ctx, va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
@@ -1987,6 +2032,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
|
||||
{ AV1E_SET_ENABLE_ORDER_HINT, ctrl_set_enable_order_hint },
|
||||
{ AV1E_SET_ENABLE_TX64, ctrl_set_enable_tx64 },
|
||||
{ AV1E_SET_ENABLE_DIST_WTD_COMP, ctrl_set_enable_dist_wtd_comp },
|
||||
{ AV1E_SET_MAX_REFERENCE_FRAMES, ctrl_set_max_reference_frames },
|
||||
{ AV1E_SET_ENABLE_REF_FRAME_MVS, ctrl_set_enable_ref_frame_mvs },
|
||||
{ AV1E_SET_ALLOW_REF_FRAME_MVS, ctrl_set_allow_ref_frame_mvs },
|
||||
{ AV1E_SET_ENABLE_MASKED_COMP, ctrl_set_enable_masked_comp },
|
||||
@@ -2008,6 +2054,9 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
|
||||
{ AV1E_SET_ENABLE_ANGLE_DELTA, ctrl_set_enable_angle_delta },
|
||||
{ AV1E_SET_AQ_MODE, ctrl_set_aq_mode },
|
||||
{ AV1E_SET_REDUCED_TX_TYPE_SET, ctrl_set_reduced_tx_type_set },
|
||||
{ AV1E_SET_INTRA_DCT_ONLY, ctrl_set_intra_dct_only },
|
||||
{ AV1E_SET_INTER_DCT_ONLY, ctrl_set_inter_dct_only },
|
||||
{ AV1E_SET_QUANT_B_ADAPT, ctrl_set_quant_b_adapt },
|
||||
{ AV1E_SET_DELTAQ_MODE, ctrl_set_deltaq_mode },
|
||||
{ AV1E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
|
||||
{ AV1E_SET_TUNE_CONTENT, ctrl_set_tune_content },
|
||||
|
||||
Vendored
+4
-2
@@ -243,8 +243,10 @@ typedef struct MB_MODE_INFO {
|
||||
// Joint sign of alpha Cb and alpha Cr
|
||||
int cfl_alpha_signs;
|
||||
|
||||
int compound_idx;
|
||||
// Indicate if masked compound is used(1) or not(0).
|
||||
int comp_group_idx;
|
||||
// If comp_group_idx=0, indicate if dist_wtd_comp(0) or avg_comp(1) is used.
|
||||
int compound_idx;
|
||||
#if CONFIG_INSPECTION
|
||||
int16_t tx_skip[TXK_TYPE_BUF_LEN];
|
||||
#endif
|
||||
@@ -596,7 +598,7 @@ typedef struct macroblockd {
|
||||
uint8_t *tmp_obmc_bufs[2];
|
||||
} MACROBLOCKD;
|
||||
|
||||
static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
|
||||
static INLINE int is_cur_buf_hbd(const MACROBLOCKD *xd) {
|
||||
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
Vendored
+5
-6
@@ -37,7 +37,7 @@ void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
|
||||
assert(pred_plane < CFL_PRED_PLANES);
|
||||
assert(width <= CFL_BUF_LINE);
|
||||
|
||||
if (get_bitdepth_data_path_index(xd)) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input);
|
||||
memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1);
|
||||
return;
|
||||
@@ -69,7 +69,7 @@ void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
|
||||
assert(pred_plane < CFL_PRED_PLANES);
|
||||
assert(width <= CFL_BUF_LINE);
|
||||
assert(height <= CFL_BUF_LINE);
|
||||
if (get_bitdepth_data_path_index(xd)) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
|
||||
cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride,
|
||||
width, height);
|
||||
@@ -196,7 +196,7 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
|
||||
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
|
||||
assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <=
|
||||
CFL_BUF_SQUARE);
|
||||
if (get_bitdepth_data_path_index(xd)) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
|
||||
get_predict_hbd_fn(tx_size)(cfl->ac_buf_q3, dst_16, dst_stride, alpha_q3,
|
||||
xd->bd);
|
||||
@@ -388,8 +388,7 @@ void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
|
||||
assert(!((row & 1) && tx_size_high[tx_size] != 4));
|
||||
sub8x8_adjust_offset(cfl, &row, &col);
|
||||
}
|
||||
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size,
|
||||
get_bitdepth_data_path_index(xd));
|
||||
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size, is_cur_buf_hbd(xd));
|
||||
}
|
||||
|
||||
void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
|
||||
@@ -405,5 +404,5 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
|
||||
const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
|
||||
tx_size = get_tx_size(width, height);
|
||||
cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size,
|
||||
get_bitdepth_data_path_index(xd));
|
||||
is_cur_buf_hbd(xd));
|
||||
}
|
||||
|
||||
Vendored
+1
-1
@@ -204,7 +204,7 @@ static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
|
||||
txfm_param->eob = eob;
|
||||
txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id];
|
||||
txfm_param->bd = xd->bd;
|
||||
txfm_param->is_hbd = get_bitdepth_data_path_index(xd);
|
||||
txfm_param->is_hbd = is_cur_buf_hbd(xd);
|
||||
txfm_param->tx_set_type = av1_get_ext_tx_set_type(
|
||||
txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
|
||||
}
|
||||
|
||||
+2
-2
@@ -378,7 +378,6 @@ typedef struct AV1Common {
|
||||
int show_frame;
|
||||
int showable_frame; // frame can be used as show existing frame in future
|
||||
int show_existing_frame;
|
||||
int reset_decoder_state;
|
||||
|
||||
uint8_t disable_cdf_update;
|
||||
int allow_high_precision_mv;
|
||||
@@ -432,6 +431,7 @@ typedef struct AV1Common {
|
||||
int qm_v;
|
||||
int min_qmlevel;
|
||||
int max_qmlevel;
|
||||
int use_quant_b_adapt;
|
||||
|
||||
/* We allocate a MB_MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
@@ -501,7 +501,6 @@ typedef struct AV1Common {
|
||||
int primary_ref_frame;
|
||||
|
||||
int error_resilient_mode;
|
||||
int force_primary_ref_none;
|
||||
|
||||
int tile_cols, tile_rows;
|
||||
|
||||
@@ -642,6 +641,7 @@ static INLINE RefCntBuffer *assign_cur_frame_new_fb(AV1_COMMON *const cm) {
|
||||
if (new_fb_idx == INVALID_IDX) return NULL;
|
||||
|
||||
cm->cur_frame = &cm->buffer_pool->frame_bufs[new_fb_idx];
|
||||
cm->cur_frame->buf.buf_8bit_valid = 0;
|
||||
return cm->cur_frame;
|
||||
}
|
||||
|
||||
|
||||
+14
-60
@@ -84,12 +84,11 @@ void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
if (do_warp && xd->cur_frame_force_integer_mv == 0) {
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const struct buf_2d *const pre_buf = &pd->pre[ref];
|
||||
av1_warp_plane(&final_warp_params,
|
||||
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
|
||||
av1_warp_plane(&final_warp_params, is_cur_buf_hbd(xd), xd->bd,
|
||||
pre_buf->buf0, pre_buf->width, pre_buf->height,
|
||||
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
|
||||
pd->subsampling_x, pd->subsampling_y, conv_params);
|
||||
} else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
} else if (is_cur_buf_hbd(xd)) {
|
||||
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
|
||||
w, h, conv_params, interp_filters, is_intrabc,
|
||||
xd->bd);
|
||||
@@ -568,14 +567,15 @@ static void build_masked_compound_no_round(
|
||||
const int subh = (2 << mi_size_high_log2[sb_type]) == h;
|
||||
const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
|
||||
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
|
||||
src1_stride, mask, block_size_wide[sb_type],
|
||||
w, h, subw, subh, conv_params, xd->bd);
|
||||
else
|
||||
} else {
|
||||
aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
|
||||
src1_stride, mask, block_size_wide[sb_type], w,
|
||||
h, subw, subh, conv_params);
|
||||
}
|
||||
}
|
||||
|
||||
void av1_make_masked_inter_predictor(
|
||||
@@ -800,53 +800,6 @@ void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct obmc_check_mv_field_ctxt {
|
||||
MB_MODE_INFO *current_mi;
|
||||
int mv_field_check_result;
|
||||
};
|
||||
|
||||
static INLINE void obmc_check_identical_mv(MACROBLOCKD *xd, int rel_mi_col,
|
||||
uint8_t nb_mi_width,
|
||||
MB_MODE_INFO *nb_mi, void *fun_ctxt,
|
||||
const int num_planes) {
|
||||
(void)xd;
|
||||
(void)rel_mi_col;
|
||||
(void)nb_mi_width;
|
||||
(void)num_planes;
|
||||
struct obmc_check_mv_field_ctxt *ctxt =
|
||||
(struct obmc_check_mv_field_ctxt *)fun_ctxt;
|
||||
const MB_MODE_INFO *current_mi = ctxt->current_mi;
|
||||
|
||||
if (ctxt->mv_field_check_result == 0) return;
|
||||
|
||||
if (nb_mi->ref_frame[0] != current_mi->ref_frame[0] ||
|
||||
nb_mi->mv[0].as_int != current_mi->mv[0].as_int ||
|
||||
nb_mi->interp_filters != current_mi->interp_filters) {
|
||||
ctxt->mv_field_check_result = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the neighbors' motions used by obmc have same parameters as for
|
||||
// the current block. If all the parameters are identical, obmc will produce
|
||||
// the same prediction as from regular bmc, therefore we can skip the
|
||||
// overlapping operations for less complexity. The parameters checked include
|
||||
// reference frame, motion vector, and interpolation filter.
|
||||
int av1_check_identical_obmc_mv_field(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col) {
|
||||
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
|
||||
struct obmc_check_mv_field_ctxt mv_field_check_ctxt = { xd->mi[0], 1 };
|
||||
|
||||
foreach_overlappable_nb_above(cm, xd, mi_col,
|
||||
max_neighbor_obmc[mi_size_wide_log2[bsize]],
|
||||
obmc_check_identical_mv, &mv_field_check_ctxt);
|
||||
foreach_overlappable_nb_left(cm, xd, mi_row,
|
||||
max_neighbor_obmc[mi_size_high_log2[bsize]],
|
||||
obmc_check_identical_mv, &mv_field_check_ctxt);
|
||||
|
||||
return mv_field_check_ctxt.mv_field_check_result;
|
||||
}
|
||||
|
||||
struct obmc_inter_pred_ctxt {
|
||||
uint8_t **adjacent;
|
||||
int *adjacent_stride;
|
||||
@@ -860,7 +813,7 @@ static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
|
||||
(void)above_mi;
|
||||
struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
|
||||
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
|
||||
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
|
||||
const int is_hbd = is_cur_buf_hbd(xd);
|
||||
const int overlap =
|
||||
AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
|
||||
|
||||
@@ -897,7 +850,7 @@ static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
|
||||
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
|
||||
const int overlap =
|
||||
AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
|
||||
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
|
||||
const int is_hbd = is_cur_buf_hbd(xd);
|
||||
|
||||
for (int plane = 0; plane < num_planes; ++plane) {
|
||||
const struct macroblockd_plane *pd = &xd->plane[plane];
|
||||
@@ -1142,8 +1095,8 @@ static void combine_interintra_highbd(
|
||||
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd,
|
||||
BLOCK_SIZE bsize, int plane,
|
||||
BUFFER_SET *ctx, uint8_t *dst,
|
||||
int dst_stride) {
|
||||
const BUFFER_SET *ctx,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const int ssx = xd->plane[plane].subsampling_x;
|
||||
const int ssy = xd->plane[plane].subsampling_y;
|
||||
@@ -1166,7 +1119,7 @@ void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
|
||||
const int ssx = xd->plane[plane].subsampling_x;
|
||||
const int ssy = xd->plane[plane].subsampling_y;
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
combine_interintra_highbd(
|
||||
xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
|
||||
xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
|
||||
@@ -1185,9 +1138,9 @@ void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
|
||||
// build interintra_predictors for one plane
|
||||
void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
uint8_t *pred, int stride,
|
||||
BUFFER_SET *ctx, int plane,
|
||||
const BUFFER_SET *ctx, int plane,
|
||||
BLOCK_SIZE bsize) {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
|
||||
av1_build_intra_predictors_for_interintra(
|
||||
cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
|
||||
@@ -1206,7 +1159,8 @@ void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
uint8_t *upred, uint8_t *vpred,
|
||||
int ustride, int vstride,
|
||||
BUFFER_SET *ctx, BLOCK_SIZE bsize) {
|
||||
const BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
|
||||
av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
|
||||
}
|
||||
|
||||
+4
-5
@@ -161,8 +161,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
|
||||
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi);
|
||||
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane *pd, int dir);
|
||||
int av1_check_identical_obmc_mv_field(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col);
|
||||
|
||||
static INLINE int is_interinter_compound_used(COMPOUND_TYPE type,
|
||||
BLOCK_SIZE sb_type) {
|
||||
@@ -335,17 +333,18 @@ const uint8_t *av1_get_compound_type_mask(
|
||||
// build interintra_predictors for one plane
|
||||
void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
uint8_t *pred, int stride,
|
||||
BUFFER_SET *ctx, int plane,
|
||||
const BUFFER_SET *ctx, int plane,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
uint8_t *upred, uint8_t *vpred,
|
||||
int ustride, int vstride,
|
||||
BUFFER_SET *ctx, BLOCK_SIZE bsize);
|
||||
const BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void av1_build_intra_predictors_for_interintra(
|
||||
const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
|
||||
BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
|
||||
const BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
|
||||
|
||||
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
|
||||
const uint8_t *inter_pred, int inter_stride,
|
||||
|
||||
+2
-2
@@ -1510,7 +1510,7 @@ void av1_predict_intra_block(
|
||||
xd->color_index_map_offset[plane != 0];
|
||||
const uint16_t *const palette =
|
||||
mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
|
||||
for (r = 0; r < txhpx; ++r) {
|
||||
for (c = 0; c < txwpx; ++c) {
|
||||
@@ -1569,7 +1569,7 @@ void av1_predict_intra_block(
|
||||
tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
|
||||
|
||||
const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
build_intra_predictors_high(
|
||||
xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
|
||||
filter_intra_mode, tx_size, disable_edge_filter,
|
||||
|
||||
+54
-17
@@ -64,6 +64,9 @@
|
||||
|
||||
#define ACCT_STR __func__
|
||||
|
||||
#define AOM_MIN_THREADS_PER_TILE 1
|
||||
#define AOM_MAX_THREADS_PER_TILE 2
|
||||
|
||||
// This is needed by ext_tile related unit tests.
|
||||
#define EXT_TILE_DEBUG 1
|
||||
#define MC_TEMP_BUF_PELS \
|
||||
@@ -734,7 +737,7 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm,
|
||||
&scaled_mv, &subpel_x_mv, &subpel_y_mv);
|
||||
pre = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
|
||||
src_stride = pre_buf->stride;
|
||||
highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
|
||||
highbd = is_cur_buf_hbd(xd);
|
||||
extend_mc_border(sf, pre_buf, scaled_mv, block, subpel_x_mv,
|
||||
subpel_y_mv, 0, is_intrabc, highbd, xd->mc_buf[ref],
|
||||
&pre, &src_stride);
|
||||
@@ -780,7 +783,7 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm,
|
||||
&scaled_mv, &subpel_x_mv, &subpel_y_mv);
|
||||
pre[ref] = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
|
||||
src_stride[ref] = pre_buf->stride;
|
||||
highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
|
||||
highbd = is_cur_buf_hbd(xd);
|
||||
|
||||
WarpTypesAllowed warp_types;
|
||||
warp_types.global_warp_allowed = is_global[ref];
|
||||
@@ -853,7 +856,7 @@ static void dec_build_inter_predictors_for_planes(const AV1_COMMON *cm,
|
||||
|
||||
static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd, int mi_row,
|
||||
int mi_col, BUFFER_SET *ctx,
|
||||
int mi_col, const BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
|
||||
|
||||
@@ -868,7 +871,7 @@ static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
|
||||
|
||||
static void dec_build_inter_predictors_sbuv(const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd, int mi_row,
|
||||
int mi_col, BUFFER_SET *ctx,
|
||||
int mi_col, const BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
|
||||
MAX_MB_PLANE - 1);
|
||||
@@ -1013,7 +1016,7 @@ static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm,
|
||||
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
|
||||
int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
int len = sizeof(uint16_t);
|
||||
dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
|
||||
dst_buf1[1] =
|
||||
@@ -3361,6 +3364,20 @@ static int tile_worker_hook(void *arg1, void *arg2) {
|
||||
return !td->xd.corrupted;
|
||||
}
|
||||
|
||||
static INLINE int get_max_row_mt_workers_per_tile(AV1_COMMON *cm,
|
||||
TileInfo tile) {
|
||||
// NOTE: Currently value of max workers is calculated based
|
||||
// on the parse and decode time. As per the theoretical estimate
|
||||
// when percentage of parse time is equal to percentage of decode
|
||||
// time, number of workers needed to parse + decode a tile can not
|
||||
// exceed more than 2.
|
||||
// TODO(any): Modify this value if parsing is optimized in future.
|
||||
int sb_rows = av1_get_sb_rows_in_tile(cm, tile);
|
||||
int max_workers =
|
||||
sb_rows == 1 ? AOM_MIN_THREADS_PER_TILE : AOM_MAX_THREADS_PER_TILE;
|
||||
return max_workers;
|
||||
}
|
||||
|
||||
// The caller must hold pbi->row_mt_mutex_ when calling this function.
|
||||
// Returns 1 if either the next job is stored in *next_job_info or 1 is stored
|
||||
// in *end_of_frame.
|
||||
@@ -3391,8 +3408,8 @@ static int get_next_job_info(AV1Decoder *const pbi,
|
||||
int min_threads_working = INT_MAX;
|
||||
int max_mis_to_decode = 0;
|
||||
int tile_row_idx, tile_col_idx;
|
||||
int tile_row = 0;
|
||||
int tile_col = 0;
|
||||
int tile_row = -1;
|
||||
int tile_col = -1;
|
||||
|
||||
memset(next_job_info, 0, sizeof(*next_job_info));
|
||||
|
||||
@@ -3440,7 +3457,9 @@ static int get_next_job_info(AV1Decoder *const pbi,
|
||||
max_mis_to_decode = 0;
|
||||
}
|
||||
if (num_threads_working == min_threads_working &&
|
||||
num_mis_to_decode > max_mis_to_decode) {
|
||||
num_mis_to_decode > max_mis_to_decode &&
|
||||
num_threads_working <
|
||||
get_max_row_mt_workers_per_tile(cm, tile_data->tile_info)) {
|
||||
max_mis_to_decode = num_mis_to_decode;
|
||||
tile_row = tile_row_idx;
|
||||
tile_col = tile_col_idx;
|
||||
@@ -3448,6 +3467,8 @@ static int get_next_job_info(AV1Decoder *const pbi,
|
||||
}
|
||||
}
|
||||
}
|
||||
// No job found to process
|
||||
if (tile_row == -1 || tile_col == -1) return 0;
|
||||
|
||||
tile_data = pbi->tile_data + tile_row * cm->tile_cols + tile_col;
|
||||
tile_info = tile_data->tile_info;
|
||||
@@ -3576,9 +3597,22 @@ static int row_mt_worker_hook(void *arg1, void *arg2) {
|
||||
TileDataDec *const tile_data = cur_job_info->tile_data;
|
||||
tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data,
|
||||
allow_update_cdf);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_lock(pbi->row_mt_mutex_);
|
||||
#endif
|
||||
tile_data->dec_row_mt_sync.num_threads_working++;
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_unlock(pbi->row_mt_mutex_);
|
||||
#endif
|
||||
// decode tile
|
||||
parse_tile_row_mt(pbi, td, tile_data);
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_lock(pbi->row_mt_mutex_);
|
||||
#endif
|
||||
tile_data->dec_row_mt_sync.num_threads_working--;
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_unlock(pbi->row_mt_mutex_);
|
||||
#endif
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@@ -4055,7 +4089,8 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
|
||||
int tile_cols_start;
|
||||
int tile_cols_end;
|
||||
int tile_count_tg;
|
||||
int num_workers;
|
||||
int num_workers = 0;
|
||||
int max_threads;
|
||||
const uint8_t *raw_data_end = NULL;
|
||||
int max_sb_rows = 0;
|
||||
|
||||
@@ -4071,7 +4106,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
|
||||
tile_cols_end = tile_cols;
|
||||
}
|
||||
tile_count_tg = end_tile - start_tile + 1;
|
||||
num_workers = pbi->max_threads;
|
||||
max_threads = pbi->max_threads;
|
||||
|
||||
// No tiles to decode.
|
||||
if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start ||
|
||||
@@ -4084,7 +4119,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
|
||||
assert(tile_rows <= MAX_TILE_ROWS);
|
||||
assert(tile_cols <= MAX_TILE_COLS);
|
||||
assert(tile_count_tg > 0);
|
||||
assert(num_workers > 0);
|
||||
assert(max_threads > 0);
|
||||
assert(start_tile <= end_tile);
|
||||
assert(start_tile >= 0 && end_tile < n_tiles);
|
||||
|
||||
@@ -4116,8 +4151,10 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
|
||||
|
||||
max_sb_rows = AOMMAX(max_sb_rows,
|
||||
av1_get_sb_rows_in_tile(cm, tile_data->tile_info));
|
||||
num_workers += get_max_row_mt_workers_per_tile(cm, tile_data->tile_info);
|
||||
}
|
||||
}
|
||||
num_workers = AOMMIN(num_workers, max_threads);
|
||||
|
||||
if (pbi->allocated_row_mt_sync_rows != max_sb_rows) {
|
||||
for (int i = 0; i < n_tiles; ++i) {
|
||||
@@ -4817,7 +4854,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
|
||||
cm->error_resilient_mode = 1;
|
||||
} else {
|
||||
cm->show_existing_frame = aom_rb_read_bit(rb);
|
||||
cm->reset_decoder_state = 0;
|
||||
pbi->reset_decoder_state = 0;
|
||||
|
||||
if (cm->show_existing_frame) {
|
||||
if (pbi->sequence_header_changed) {
|
||||
@@ -4859,7 +4896,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
|
||||
// assign_frame_buffer_p()!
|
||||
assert(!cm->cur_frame->raw_frame_buffer.data);
|
||||
assign_frame_buffer_p(&cm->cur_frame, frame_to_show);
|
||||
cm->reset_decoder_state = frame_to_show->frame_type == KEY_FRAME;
|
||||
pbi->reset_decoder_state = frame_to_show->frame_type == KEY_FRAME;
|
||||
unlock_buffer_pool(pool);
|
||||
|
||||
cm->lf.filter_level[0] = 0;
|
||||
@@ -4869,11 +4906,11 @@ static int read_uncompressed_header(AV1Decoder *pbi,
|
||||
if (!frame_to_show->showable_frame) {
|
||||
aom_merge_corrupted_flag(&xd->corrupted, 1);
|
||||
}
|
||||
if (cm->reset_decoder_state) frame_to_show->showable_frame = 0;
|
||||
if (pbi->reset_decoder_state) frame_to_show->showable_frame = 0;
|
||||
|
||||
cm->film_grain_params = frame_to_show->film_grain_params;
|
||||
|
||||
if (cm->reset_decoder_state) {
|
||||
if (pbi->reset_decoder_state) {
|
||||
show_existing_frame_reset(pbi, existing_frame_idx);
|
||||
} else {
|
||||
current_frame->refresh_frame_flags = 0;
|
||||
@@ -5471,7 +5508,7 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
|
||||
if (cm->show_existing_frame) {
|
||||
// showing a frame directly
|
||||
*p_data_end = data + uncomp_hdr_size;
|
||||
if (cm->reset_decoder_state) {
|
||||
if (pbi->reset_decoder_state) {
|
||||
// Use the default frame context values.
|
||||
*cm->fc = *cm->default_frame_context;
|
||||
if (!cm->fc->initialized)
|
||||
|
||||
+2
-1
@@ -699,7 +699,8 @@ static void read_intrabc_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
mi_col, bsize, r);
|
||||
if (!valid_dv) {
|
||||
// Intra bc motion vectors are not valid - signal corrupt frame
|
||||
aom_merge_corrupted_flag(&xd->corrupted, 1);
|
||||
aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
|
||||
"Invalid intrabc dv");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -361,7 +361,7 @@ static void swap_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
|
||||
assert(IMPLIES(!pbi->hold_ref_buf,
|
||||
cm->current_frame.refresh_frame_flags == 0));
|
||||
assert(IMPLIES(!pbi->hold_ref_buf,
|
||||
cm->show_existing_frame && !cm->reset_decoder_state));
|
||||
cm->show_existing_frame && !pbi->reset_decoder_state));
|
||||
|
||||
// The following two for loops need to release the reference stored in
|
||||
// cm->ref_frame_map[ref_index] before transferring the reference stored
|
||||
@@ -374,7 +374,7 @@ static void swap_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
|
||||
}
|
||||
|
||||
const int check_on_show_existing_frame =
|
||||
!cm->show_existing_frame || cm->reset_decoder_state;
|
||||
!cm->show_existing_frame || pbi->reset_decoder_state;
|
||||
for (; ref_index < REF_FRAMES && check_on_show_existing_frame;
|
||||
++ref_index) {
|
||||
decrease_ref_count(cm->ref_frame_map[ref_index], pool);
|
||||
|
||||
+1
@@ -200,6 +200,7 @@ typedef struct AV1Decoder {
|
||||
int need_resync; // wait for key/intra-only frame.
|
||||
int hold_ref_buf; // Boolean: whether we are holding reference buffers in
|
||||
// common.next_ref_frame_map.
|
||||
int reset_decoder_state;
|
||||
|
||||
int tile_size_bytes;
|
||||
int tile_col_size_bytes;
|
||||
|
||||
+7
-4
@@ -155,9 +155,6 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(levels_buf, 0,
|
||||
sizeof(*levels_buf) *
|
||||
((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
|
||||
if (plane == AOM_PLANE_Y) {
|
||||
// only y plane's tx_type is transmitted
|
||||
av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r);
|
||||
@@ -241,6 +238,12 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
}
|
||||
*eob = rec_eob_pos(eob_pt, eob_extra);
|
||||
|
||||
if (*eob > 1) {
|
||||
memset(levels_buf, 0,
|
||||
sizeof(*levels_buf) *
|
||||
((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
|
||||
}
|
||||
|
||||
{
|
||||
// Read the non-zero coefficient with scan index eob-1
|
||||
// TODO(angiebird): Put this into a function
|
||||
@@ -252,7 +255,7 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx];
|
||||
int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + 1;
|
||||
if (level > NUM_BASE_LEVELS) {
|
||||
const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
|
||||
const int br_ctx = get_br_ctx_eob(pos, bwl, tx_class);
|
||||
cdf = ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx];
|
||||
for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
|
||||
const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
|
||||
|
||||
+2
-2
@@ -121,7 +121,7 @@ int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
|
||||
|
||||
for (i = 0; i < bh; i += 4) {
|
||||
for (j = 0; j < bw; j += 4) {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
var +=
|
||||
log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
|
||||
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
|
||||
@@ -153,7 +153,7 @@ static unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
|
||||
uint8_t *buf = x->plane[0].src.buf;
|
||||
const int bw = MI_SIZE * mi_size_wide[bs];
|
||||
const int bh = MI_SIZE * mi_size_high[bs];
|
||||
int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
|
||||
const int hbd = is_cur_buf_hbd(xd);
|
||||
|
||||
int var = 0;
|
||||
for (int r = 0; r < bh; r += 8)
|
||||
|
||||
+147
-84
@@ -41,47 +41,37 @@ static void quantize_fp_helper_c(
|
||||
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
|
||||
const qm_val_t *iqm_ptr, int log_scale) {
|
||||
int i, eob = -1;
|
||||
const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
|
||||
ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
|
||||
// TODO(jingning) Decide the need of these arguments after the
|
||||
// quantization process is completed.
|
||||
(void)zbin_ptr;
|
||||
(void)quant_shift_ptr;
|
||||
(void)iscan;
|
||||
|
||||
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||
|
||||
if (qm_ptr == NULL && iqm_ptr == NULL) {
|
||||
const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
|
||||
{ // rc == 0
|
||||
const int coeff = coeff_ptr[0];
|
||||
for (i = 0; i < n_coeffs; i++) {
|
||||
const int rc = scan[i];
|
||||
const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
if ((abs_coeff << (1 + log_scale)) >= (int32_t)(dequant_ptr[0])) {
|
||||
abs_coeff = clamp64(abs_coeff + rounding0, INT16_MIN, INT16_MAX);
|
||||
const int tmp32 = (int)((abs_coeff * quant_ptr[0]) >> (16 - log_scale));
|
||||
int tmp32 = 0;
|
||||
if ((abs_coeff << (1 + log_scale)) >= thresh) {
|
||||
abs_coeff =
|
||||
clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
|
||||
tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
|
||||
if (tmp32) {
|
||||
qcoeff_ptr[0] = (tmp32 ^ coeff_sign) - coeff_sign;
|
||||
const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[0]) >> log_scale;
|
||||
dqcoeff_ptr[0] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
|
||||
eob = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
const int rounding1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
|
||||
const int32_t thresh1 = (int32_t)(dequant_ptr[1]);
|
||||
for (i = 1; i < n_coeffs; i++) {
|
||||
const int coeff = coeff_ptr[i];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
if ((abs_coeff << (1 + log_scale)) >= thresh1) {
|
||||
abs_coeff = clamp64(abs_coeff + rounding1, INT16_MIN, INT16_MAX);
|
||||
const int tmp32 = (int)((abs_coeff * quant_ptr[1]) >> (16 - log_scale));
|
||||
if (tmp32) {
|
||||
qcoeff_ptr[i] = (tmp32 ^ coeff_sign) - coeff_sign;
|
||||
const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[1]) >> log_scale;
|
||||
dqcoeff_ptr[i] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
|
||||
eob = AOMMAX(iscan[i], eob);
|
||||
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
|
||||
const tran_low_t abs_dqcoeff =
|
||||
(tmp32 * dequant_ptr[rc != 0]) >> log_scale;
|
||||
dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
|
||||
}
|
||||
}
|
||||
if (tmp32) eob = i;
|
||||
}
|
||||
} else {
|
||||
// Quantization pass: All coefficients with index >= zero_flag are
|
||||
@@ -99,7 +89,7 @@ static void quantize_fp_helper_c(
|
||||
int tmp32 = 0;
|
||||
if (abs_coeff * wt >=
|
||||
(dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
|
||||
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
|
||||
abs_coeff += rounding[rc != 0];
|
||||
abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
|
||||
tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
|
||||
(16 - log_scale + AOM_QM_BITS));
|
||||
@@ -275,32 +265,65 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
|
||||
const qm_val_t *qm_ptr = qparam->qmatrix;
|
||||
const qm_val_t *iqm_ptr = qparam->iqmatrix;
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
if (qparam->use_quant_b_adapt) {
|
||||
// TODO(sarahparker) These quantize_b optimizations need SIMD
|
||||
// implementations
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
quantize_b_adaptive_helper_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
|
||||
sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
aom_quantize_b_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
case 1:
|
||||
aom_quantize_b_32x32_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_quantize_b_64x64_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
case 1:
|
||||
aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
case 1:
|
||||
aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -391,41 +414,81 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
|
||||
const QUANT_PARAM *qparam) {
|
||||
const qm_val_t *qm_ptr = qparam->qmatrix;
|
||||
const qm_val_t *iqm_ptr = qparam->iqmatrix;
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
if (qparam->use_quant_b_adapt) {
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
highbd_quantize_b_adaptive_helper_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
|
||||
sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
if (LIKELY(n_coeffs >= 8)) {
|
||||
aom_highbd_quantize_b_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
} else {
|
||||
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
|
||||
// quantization
|
||||
aom_highbd_quantize_b_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
aom_highbd_quantize_b_32x32_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_highbd_quantize_b_64x64_adaptive_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
if (LIKELY(n_coeffs >= 8)) {
|
||||
aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan);
|
||||
} else {
|
||||
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
|
||||
// quantization
|
||||
aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
|
||||
if (qm_ptr != NULL && iqm_ptr != NULL) {
|
||||
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
|
||||
p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
|
||||
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
|
||||
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
|
||||
} else {
|
||||
switch (qparam->log_scale) {
|
||||
case 0:
|
||||
if (LIKELY(n_coeffs >= 8)) {
|
||||
aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX,
|
||||
p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
|
||||
p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
aom_highbd_quantize_b_32x32(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_highbd_quantize_b_64x64(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
} else {
|
||||
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
|
||||
// quantization
|
||||
aom_highbd_quantize_b_c(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
aom_highbd_quantize_b_32x32(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
case 2:
|
||||
aom_highbd_quantize_b_64x64(
|
||||
coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
|
||||
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
|
||||
eob_ptr, sc->scan, sc->iscan);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+1
@@ -27,6 +27,7 @@ typedef struct QUANT_PARAM {
|
||||
TX_SIZE tx_size;
|
||||
const qm_val_t *qmatrix;
|
||||
const qm_val_t *iqmatrix;
|
||||
int use_quant_b_adapt;
|
||||
} QUANT_PARAM;
|
||||
|
||||
typedef void (*AV1_QUANT_FACADE)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
|
||||
Vendored
+3
-7
@@ -54,10 +54,10 @@ typedef struct macroblock_plane {
|
||||
typedef struct {
|
||||
int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
|
||||
int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
|
||||
int base_cost[SIG_COEF_CONTEXTS][4];
|
||||
int base_cost[SIG_COEF_CONTEXTS][8];
|
||||
int eob_extra_cost[EOB_COEF_CONTEXTS][2];
|
||||
int dc_sign_cost[DC_SIGN_CONTEXTS][2];
|
||||
int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
|
||||
int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
|
||||
} LV_MAP_COEFF_COST;
|
||||
|
||||
typedef struct {
|
||||
@@ -192,16 +192,14 @@ typedef struct {
|
||||
int32_t rate[COMPOUND_TYPES];
|
||||
int64_t dist[COMPOUND_TYPES];
|
||||
int_mv mv[2];
|
||||
int8_t ref_frames[2];
|
||||
MV_REFERENCE_FRAME ref_frames[2];
|
||||
PREDICTION_MODE mode;
|
||||
InterpFilters filter;
|
||||
int ref_mv_idx;
|
||||
int is_global[2];
|
||||
} COMP_RD_STATS;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
struct inter_modes_info;
|
||||
#endif
|
||||
typedef struct macroblock MACROBLOCK;
|
||||
struct macroblock {
|
||||
struct macroblock_plane plane[MAX_MB_PLANE];
|
||||
@@ -300,9 +298,7 @@ struct macroblock {
|
||||
// to the accurate tile context.
|
||||
FRAME_CONTEXT *tile_pb_ctx;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
struct inter_modes_info *inter_modes_info;
|
||||
#endif
|
||||
|
||||
// buffer for hash value calculation of a block
|
||||
// used only in av1_get_block_hash_value()
|
||||
|
||||
Vendored
+4
@@ -30,6 +30,10 @@ extern const uint16_t av1_prob_cost[128];
|
||||
|
||||
// Calculate the cost of a symbol with probability p15 / 2^15
|
||||
static INLINE int av1_cost_symbol(aom_cdf_prob p15) {
|
||||
// p15 can be out of range [1, CDF_PROB_TOP - 1]. Clamping it, so that the
|
||||
// following cost calculation works correctly. Otherwise, if p15 =
|
||||
// CDF_PROB_TOP, shift would be -1, and "p15 << shift" would be wrong.
|
||||
p15 = (aom_cdf_prob)clamp(p15, 1, CDF_PROB_TOP - 1);
|
||||
assert(0 < p15 && p15 < CDF_PROB_TOP);
|
||||
const int shift = CDF_PROB_BITS - 1 - get_msb(p15);
|
||||
const int prob = get_prob(p15 << shift, CDF_PROB_TOP);
|
||||
|
||||
+772
-13
@@ -11,12 +11,244 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/aom_scale_rtcd.h"
|
||||
|
||||
#include "aom/aom_codec.h"
|
||||
#include "aom/aom_encoder.h"
|
||||
|
||||
#include "aom_ports/system_state.h"
|
||||
|
||||
#if CONFIG_MISMATCH_DEBUG
|
||||
#include "aom_util/debug_util.h"
|
||||
#endif // CONFIG_MISMATCH_DEBUG
|
||||
|
||||
#include "av1/common/onyxc_int.h"
|
||||
|
||||
#include "av1/encoder/encoder.h"
|
||||
#include "av1/encoder/encode_strategy.h"
|
||||
#include "av1/encoder/firstpass.h"
|
||||
#include "av1/encoder/temporal_filter.h"
|
||||
#include "av1/encoder/tpl_model.h"
|
||||
|
||||
// Define the reference buffers that will be updated post encode.
|
||||
void av1_configure_buffer_updates(AV1_COMP *cpi, const FRAME_UPDATE_TYPE type) {
|
||||
// NOTE(weitinglin): Should we define another function to take care of
|
||||
// cpi->rc.is_$Source_Type to make this function as it is in the comment?
|
||||
|
||||
// show_existing_frame is a flag left set from the end of encoding the
|
||||
// previous frame. Alongside it, is_src_frame_alt_ref may also be left
|
||||
// set so shouldn't be cleared in this case.
|
||||
if (!cpi->common.show_existing_frame) cpi->rc.is_src_frame_alt_ref = 0;
|
||||
|
||||
cpi->rc.is_bwd_ref_frame = 0;
|
||||
cpi->rc.is_last_bipred_frame = 0;
|
||||
cpi->rc.is_bipred_frame = 0;
|
||||
cpi->rc.is_src_frame_ext_arf = 0;
|
||||
|
||||
switch (type) {
|
||||
case KF_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 1;
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case LF_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
case GF_UPDATE:
|
||||
// TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
|
||||
// needed.
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
case OVERLAY_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_src_frame_alt_ref = 1;
|
||||
break;
|
||||
|
||||
case ARF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
// NOTE: BWDREF does not get updated along with ALTREF_FRAME.
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case BRF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_bwd_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case LAST_BIPRED_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_last_bipred_frame = 1;
|
||||
break;
|
||||
|
||||
case BIPRED_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_bipred_frame = 1;
|
||||
break;
|
||||
|
||||
case INTNL_OVERLAY_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_src_frame_alt_ref = 1;
|
||||
cpi->rc.is_src_frame_ext_arf = 1;
|
||||
break;
|
||||
|
||||
case INTNL_ARF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
if (cpi->new_bwdref_update_rule == 1 && cpi->oxcf.pass == 2) {
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
} else {
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 1;
|
||||
}
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
|
||||
static void set_additional_frame_flags(const AV1_COMMON *const cm,
|
||||
unsigned int *const frame_flags) {
|
||||
if (frame_is_intra_only(cm)) *frame_flags |= FRAMEFLAGS_INTRAONLY;
|
||||
if (frame_is_sframe(cm)) *frame_flags |= FRAMEFLAGS_SWITCH;
|
||||
if (cm->error_resilient_mode) *frame_flags |= FRAMEFLAGS_ERROR_RESILIENT;
|
||||
}
|
||||
|
||||
static INLINE void update_keyframe_counters(AV1_COMP *cpi) {
|
||||
// TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
|
||||
// differently here for rc->avg_frame_bandwidth.
|
||||
if (cpi->common.show_frame || cpi->rc.is_bwd_ref_frame) {
|
||||
if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
|
||||
cpi->common.current_frame.frame_type == KEY_FRAME) {
|
||||
// If this is a show_existing_frame with a source other than altref,
|
||||
// or if it is not a displayed forward keyframe, the keyframe update
|
||||
// counters were incremented when it was originally encoded.
|
||||
cpi->rc.frames_since_key++;
|
||||
cpi->rc.frames_to_key--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE int is_frame_droppable(const AV1_COMP *const cpi) {
|
||||
return !(cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
|
||||
cpi->refresh_bwd_ref_frame || cpi->refresh_golden_frame ||
|
||||
cpi->refresh_last_frame);
|
||||
}
|
||||
|
||||
static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
|
||||
// TODO(weitinglin): Updating this counter for is_frame_droppable
|
||||
// is a work-around to handle the condition when a frame is drop.
|
||||
// We should fix the cpi->common.show_frame flag
|
||||
// instead of checking the other condition to update the counter properly.
|
||||
if (cpi->common.show_frame || is_frame_droppable(cpi)) {
|
||||
// Decrement count down till next gf
|
||||
if (cpi->rc.frames_till_gf_update_due > 0)
|
||||
cpi->rc.frames_till_gf_update_due--;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void update_twopass_gf_group_index(AV1_COMP *cpi) {
|
||||
// Increment the gf group index ready for the next frame. If this is
|
||||
// a show_existing_frame with a source other than altref, or if it is not
|
||||
// a displayed forward keyframe, the index was incremented when it was
|
||||
// originally encoded.
|
||||
if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
|
||||
cpi->common.current_frame.frame_type == KEY_FRAME) {
|
||||
++cpi->twopass.gf_group.index;
|
||||
}
|
||||
}
|
||||
|
||||
static void update_rc_counts(AV1_COMP *cpi) {
|
||||
update_keyframe_counters(cpi);
|
||||
update_frames_till_gf_update(cpi);
|
||||
if (cpi->oxcf.pass == 2) update_twopass_gf_group_index(cpi);
|
||||
}
|
||||
|
||||
static void check_show_existing_frame(AV1_COMP *cpi) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
const FRAME_UPDATE_TYPE next_frame_update_type =
|
||||
gf_group->update_type[gf_group->index];
|
||||
const int which_arf = (cpi->new_bwdref_update_rule == 1)
|
||||
? gf_group->arf_update_idx[gf_group->index] > 0
|
||||
: gf_group->arf_update_idx[gf_group->index];
|
||||
|
||||
if (cm->show_existing_frame == 1) {
|
||||
cm->show_existing_frame = 0;
|
||||
} else if (cpi->rc.is_last_bipred_frame) {
|
||||
// NOTE: When new structure is used, every bwdref will have one overlay
|
||||
// frame. Therefore, there is no need to find out which frame to
|
||||
// show in advance.
|
||||
if (cpi->new_bwdref_update_rule == 0) {
|
||||
// NOTE: If the current frame is a last bi-predictive frame, it is
|
||||
// needed next to show the BWDREF_FRAME, which is pointed by
|
||||
// the last_fb_idxes[0] after reference frame buffer update
|
||||
cpi->rc.is_last_bipred_frame = 0;
|
||||
cm->show_existing_frame = 1;
|
||||
cpi->existing_fb_idx_to_show = cm->remapped_ref_idx[0];
|
||||
}
|
||||
} else if (cpi->is_arf_filter_off[which_arf] &&
|
||||
(next_frame_update_type == OVERLAY_UPDATE ||
|
||||
next_frame_update_type == INTNL_OVERLAY_UPDATE)) {
|
||||
const int bwdref_to_show =
|
||||
(cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
|
||||
// Other parameters related to OVERLAY_UPDATE will be taken care of
|
||||
// in av1_rc_get_second_pass_params(cpi)
|
||||
cm->show_existing_frame = 1;
|
||||
cpi->rc.is_src_frame_alt_ref = 1;
|
||||
cpi->existing_fb_idx_to_show =
|
||||
(next_frame_update_type == OVERLAY_UPDATE)
|
||||
? get_ref_frame_map_idx(cm, ALTREF_FRAME)
|
||||
: get_ref_frame_map_idx(cm, bwdref_to_show);
|
||||
if (cpi->new_bwdref_update_rule == 0) {
|
||||
cpi->is_arf_filter_off[which_arf] = 0;
|
||||
}
|
||||
}
|
||||
cpi->rc.is_src_frame_ext_arf = 0;
|
||||
}
|
||||
|
||||
static void set_ext_overrides(AV1_COMP *const cpi,
|
||||
EncodeFrameParams *const frame_params) {
|
||||
@@ -28,9 +260,8 @@ static void set_ext_overrides(AV1_COMP *const cpi,
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
|
||||
if (cpi->ext_use_s_frame) {
|
||||
cm->current_frame.frame_type = S_FRAME;
|
||||
frame_params->frame_type = S_FRAME;
|
||||
}
|
||||
cm->force_primary_ref_none = cpi->ext_use_primary_ref_none;
|
||||
|
||||
if (cpi->ext_refresh_frame_context_pending) {
|
||||
cm->refresh_frame_context = cpi->ext_refresh_frame_context;
|
||||
@@ -50,10 +281,9 @@ static void set_ext_overrides(AV1_COMP *const cpi,
|
||||
// A keyframe is already error resilient and keyframes with
|
||||
// error_resilient_mode interferes with the use of show_existing_frame
|
||||
// when forward reference keyframes are enabled.
|
||||
frame_params->error_resilient_mode &=
|
||||
cm->current_frame.frame_type != KEY_FRAME;
|
||||
frame_params->error_resilient_mode &= frame_params->frame_type != KEY_FRAME;
|
||||
// For bitstream conformance, s-frames must be error-resilient
|
||||
frame_params->error_resilient_mode |= frame_is_sframe(cm);
|
||||
frame_params->error_resilient_mode |= frame_params->frame_type == S_FRAME;
|
||||
}
|
||||
|
||||
static int get_ref_frame_flags(const AV1_COMP *const cpi) {
|
||||
@@ -130,10 +360,474 @@ static int get_ref_frame_flags(const AV1_COMP *const cpi) {
|
||||
return flags;
|
||||
}
|
||||
|
||||
static int get_current_frame_ref_type(
|
||||
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
// We choose the reference "type" of this frame from the flags which indicate
|
||||
// which reference frames will be refreshed by it. More than one of these
|
||||
// flags may be set, so the order here implies an order of precedence.
|
||||
// This is just used to choose the primary_ref_frame (as the most recent
|
||||
// reference buffer of the same reference-type as the current frame)
|
||||
|
||||
const int intra_only = frame_params->frame_type == KEY_FRAME ||
|
||||
frame_params->frame_type == INTRA_ONLY_FRAME;
|
||||
if (intra_only || frame_params->error_resilient_mode ||
|
||||
cpi->ext_use_primary_ref_none)
|
||||
return REGULAR_FRAME;
|
||||
else if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE)
|
||||
return EXT_ARF_FRAME;
|
||||
else if (cpi->refresh_alt_ref_frame)
|
||||
return ARF_FRAME;
|
||||
else if (cpi->rc.is_src_frame_alt_ref)
|
||||
return OVERLAY_FRAME;
|
||||
else if (cpi->refresh_golden_frame)
|
||||
return GLD_FRAME;
|
||||
else if (cpi->refresh_bwd_ref_frame)
|
||||
return BRF_FRAME;
|
||||
else
|
||||
return REGULAR_FRAME;
|
||||
}
|
||||
|
||||
static int choose_primary_ref_frame(
|
||||
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params) {
|
||||
const AV1_COMMON *const cm = &cpi->common;
|
||||
|
||||
const int intra_only = frame_params->frame_type == KEY_FRAME ||
|
||||
frame_params->frame_type == INTRA_ONLY_FRAME;
|
||||
if (intra_only || frame_params->error_resilient_mode ||
|
||||
cpi->ext_use_primary_ref_none) {
|
||||
return PRIMARY_REF_NONE;
|
||||
}
|
||||
|
||||
// Find the most recent reference frame with the same reference type as the
|
||||
// current frame
|
||||
const FRAME_CONTEXT_INDEX current_ref_type =
|
||||
get_current_frame_ref_type(cpi, frame_params);
|
||||
int wanted_fb = cpi->fb_of_context_type[current_ref_type];
|
||||
|
||||
int primary_ref_frame = PRIMARY_REF_NONE;
|
||||
for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
|
||||
if (get_ref_frame_map_idx(cm, ref_frame) == wanted_fb) {
|
||||
primary_ref_frame = ref_frame - LAST_FRAME;
|
||||
}
|
||||
}
|
||||
return primary_ref_frame;
|
||||
}
|
||||
|
||||
static void update_fb_of_context_type(
|
||||
const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params,
|
||||
int *const fb_of_context_type) {
|
||||
const AV1_COMMON *const cm = &cpi->common;
|
||||
|
||||
if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
|
||||
cpi->ext_use_primary_ref_none) {
|
||||
for (int i = 0; i < REF_FRAMES; i++) {
|
||||
fb_of_context_type[i] = -1;
|
||||
}
|
||||
fb_of_context_type[REGULAR_FRAME] =
|
||||
cm->show_frame ? get_ref_frame_map_idx(cm, GOLDEN_FRAME)
|
||||
: get_ref_frame_map_idx(cm, ALTREF_FRAME);
|
||||
}
|
||||
|
||||
if (!encode_show_existing_frame(cm)) {
|
||||
// Refresh fb_of_context_type[]: see encoder.h for explanation
|
||||
// Note that we want the value of refresh_frame_flags for the frame that
|
||||
// just happened. If we call get_refresh_frame_flags now we will get a
|
||||
// different answer, because update_reference_frames() has happened.
|
||||
if (cm->current_frame.frame_type == KEY_FRAME) {
|
||||
// All ref frames are refreshed, pick one that will live long enough
|
||||
fb_of_context_type[REGULAR_FRAME] = 0;
|
||||
} else {
|
||||
// If more than one frame is refreshed, it doesn't matter which one we
|
||||
// pick so pick the first. LST sometimes doesn't refresh any: this is ok
|
||||
const int current_frame_ref_type =
|
||||
get_current_frame_ref_type(cpi, frame_params);
|
||||
for (int i = 0; i < REF_FRAMES; i++) {
|
||||
if (cm->current_frame.refresh_frame_flags & (1 << i)) {
|
||||
fb_of_context_type[current_frame_ref_type] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int get_order_offset(const AV1_COMP *const cpi,
|
||||
const EncodeFrameParams *const frame_params) {
|
||||
// shown frame by definition has order offset 0
|
||||
// show_existing_frame ignores order_offset and simply takes the order_hint
|
||||
// from the reference frame being shown.
|
||||
if (frame_params->show_frame || cpi->common.show_existing_frame) return 0;
|
||||
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
const int arf_offset =
|
||||
AOMMIN((MAX_GF_INTERVAL - 1), gf_group->arf_src_offset[gf_group->index]);
|
||||
const int brf_offset = gf_group->brf_src_offset[gf_group->index];
|
||||
return AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
|
||||
}
|
||||
|
||||
static void adjust_frame_rate(AV1_COMP *cpi,
|
||||
const struct lookahead_entry *source) {
|
||||
int64_t this_duration;
|
||||
int step = 0;
|
||||
|
||||
// Clear down mmx registers
|
||||
aom_clear_system_state();
|
||||
|
||||
if (source->ts_start == cpi->first_time_stamp_ever) {
|
||||
this_duration = source->ts_end - source->ts_start;
|
||||
step = 1;
|
||||
} else {
|
||||
int64_t last_duration =
|
||||
cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
|
||||
|
||||
this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
|
||||
|
||||
// do a step update if the duration changes by 10%
|
||||
if (last_duration)
|
||||
step = (int)((this_duration - last_duration) * 10 / last_duration);
|
||||
}
|
||||
|
||||
if (this_duration) {
|
||||
if (step) {
|
||||
av1_new_framerate(cpi, 10000000.0 / this_duration);
|
||||
} else {
|
||||
// Average this frame's rate into the last second's average
|
||||
// frame rate. If we haven't seen 1 second yet, then average
|
||||
// over the whole interval seen.
|
||||
const double interval = AOMMIN(
|
||||
(double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
|
||||
double avg_duration = 10000000.0 / cpi->framerate;
|
||||
avg_duration *= (interval - avg_duration + this_duration);
|
||||
avg_duration /= interval;
|
||||
|
||||
av1_new_framerate(cpi, 10000000.0 / avg_duration);
|
||||
}
|
||||
}
|
||||
cpi->last_time_stamp_seen = source->ts_start;
|
||||
cpi->last_end_time_stamp_seen = source->ts_end;
|
||||
}
|
||||
|
||||
static void check_src_altref(AV1_COMP *cpi,
|
||||
const struct lookahead_entry *source) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
|
||||
// If pass == 2, the parameters set here will be reset in
|
||||
// av1_rc_get_second_pass_params()
|
||||
|
||||
if (cpi->oxcf.pass == 2) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
rc->is_src_frame_alt_ref =
|
||||
(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
|
||||
(gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
|
||||
rc->is_src_frame_ext_arf =
|
||||
gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
|
||||
} else {
|
||||
rc->is_src_frame_alt_ref =
|
||||
cpi->alt_ref_source && (source == cpi->alt_ref_source);
|
||||
}
|
||||
|
||||
if (rc->is_src_frame_alt_ref) {
|
||||
// Current frame is an ARF overlay frame.
|
||||
cpi->alt_ref_source = NULL;
|
||||
|
||||
if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) {
|
||||
// For INTNL_OVERLAY, when show_existing_frame == 0, they do need to
|
||||
// refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3,
|
||||
// LAST becomes LAST2, and INTNL_OVERLAY becomes LAST.
|
||||
cpi->refresh_last_frame = 1;
|
||||
} else {
|
||||
// Don't refresh the last buffer for an ARF overlay frame. It will
|
||||
// become the GF so preserve last as an alternative prediction option.
|
||||
cpi->refresh_last_frame = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns 0 if this is not an alt ref else the offset of the source frame
|
||||
// used as the arf midpoint.
|
||||
static int get_arf_src_index(AV1_COMP *cpi) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
int arf_src_index = 0;
|
||||
if (is_altref_enabled(cpi)) {
|
||||
if (cpi->oxcf.pass == 2) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
|
||||
arf_src_index = gf_group->arf_src_offset[gf_group->index];
|
||||
}
|
||||
} else if (rc->source_alt_ref_pending) {
|
||||
arf_src_index = rc->frames_till_gf_update_due;
|
||||
}
|
||||
}
|
||||
return arf_src_index;
|
||||
}
|
||||
|
||||
static int get_brf_src_index(AV1_COMP *cpi) {
|
||||
int brf_src_index = 0;
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
|
||||
// TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
|
||||
// flag.
|
||||
if (gf_group->bidir_pred_enabled[gf_group->index]) {
|
||||
if (cpi->oxcf.pass == 2) {
|
||||
if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
|
||||
brf_src_index = gf_group->brf_src_offset[gf_group->index];
|
||||
} else {
|
||||
// TODO(zoeliu): To re-visit the setup for this scenario
|
||||
brf_src_index = cpi->rc.bipred_group_interval - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return brf_src_index;
|
||||
}
|
||||
|
||||
// Returns 0 if this is not an alt ref else the offset of the source frame
|
||||
// used as the arf midpoint.
|
||||
static int get_arf2_src_index(AV1_COMP *cpi) {
|
||||
int arf2_src_index = 0;
|
||||
if (is_altref_enabled(cpi) && cpi->num_extra_arfs) {
|
||||
if (cpi->oxcf.pass == 2) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
|
||||
arf2_src_index = gf_group->arf_src_offset[gf_group->index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return arf2_src_index;
|
||||
}
|
||||
|
||||
// Called if this frame is an ARF or ARF2. Also handles forward-keyframes
|
||||
// For an ARF set arf2=0, for ARF2 set arf2=1
|
||||
// temporal_filtered is set to 1 if we temporally filter the ARF frame, so that
|
||||
// the correct post-filter buffer can be used.
|
||||
static struct lookahead_entry *setup_arf_or_arf2(
|
||||
AV1_COMP *const cpi, const int arf_src_index, const int arf2,
|
||||
int *temporal_filtered, EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
|
||||
|
||||
assert(arf_src_index <= rc->frames_to_key);
|
||||
*temporal_filtered = 0;
|
||||
|
||||
struct lookahead_entry *source =
|
||||
av1_lookahead_peek(cpi->lookahead, arf_src_index);
|
||||
|
||||
if (source != NULL) {
|
||||
cm->showable_frame = 1;
|
||||
cpi->alt_ref_source = source;
|
||||
|
||||
// When arf_src_index == rc->frames_to_key, it indicates a fwd_kf
|
||||
if (!arf2 && arf_src_index == rc->frames_to_key) {
|
||||
// Skip temporal filtering and mark as intra_only if we have a fwd_kf
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
int which_arf = gf_group->arf_update_idx[gf_group->index];
|
||||
cpi->is_arf_filter_off[which_arf] = 1;
|
||||
cpi->no_show_kf = 1;
|
||||
} else {
|
||||
if (oxcf->arnr_max_frames > 0) {
|
||||
// Produce the filtered ARF frame.
|
||||
av1_temporal_filter(cpi, arf_src_index);
|
||||
aom_extend_frame_borders(&cpi->alt_ref_buffer, av1_num_planes(cm));
|
||||
*temporal_filtered = 1;
|
||||
}
|
||||
}
|
||||
frame_params->show_frame = 0;
|
||||
|
||||
if (oxcf->pass < 2) {
|
||||
// In second pass, the buffer updates configure will be set
|
||||
// in the function av1_rc_get_second_pass_params
|
||||
av1_configure_buffer_updates(cpi, arf2 ? INTNL_ARF_UPDATE : ARF_UPDATE);
|
||||
}
|
||||
}
|
||||
rc->source_alt_ref_pending = 0;
|
||||
return source;
|
||||
}
|
||||
|
||||
// Determine whether there is a forced keyframe pending in the lookahead buffer
|
||||
static int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
|
||||
const int up_to_index) {
|
||||
for (int i = 0; i <= up_to_index; i++) {
|
||||
const struct lookahead_entry *e = av1_lookahead_peek(lookahead, i);
|
||||
if (e == NULL) {
|
||||
// We have reached the end of the lookahead buffer and not early-returned
|
||||
// so there isn't a forced key-frame pending.
|
||||
return 0;
|
||||
} else if (e->flags == AOM_EFLAG_FORCE_KF) {
|
||||
return 1;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 0; // Never reached
|
||||
}
|
||||
|
||||
// Check if we should encode an ARF, ARF2 or BRF. If not, try a LAST
|
||||
// Do some setup associated with the chosen source
|
||||
// Return the frame source, or NULL if we couldn't find one
|
||||
struct lookahead_entry *choose_frame_source(
|
||||
AV1_COMP *const cpi, int *const temporal_filtered, int *const flush,
|
||||
struct lookahead_entry **last_source,
|
||||
EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
struct lookahead_entry *source = NULL;
|
||||
*temporal_filtered = 0;
|
||||
|
||||
// Should we encode an alt-ref frame.
|
||||
int arf_src_index = get_arf_src_index(cpi);
|
||||
if (arf_src_index &&
|
||||
is_forced_keyframe_pending(cpi->lookahead, arf_src_index)) {
|
||||
arf_src_index = 0;
|
||||
*flush = 1;
|
||||
}
|
||||
|
||||
if (arf_src_index) {
|
||||
source = setup_arf_or_arf2(cpi, arf_src_index, 0, temporal_filtered,
|
||||
frame_params);
|
||||
}
|
||||
|
||||
// Should we encode an arf2 frame (mutually exclusive to ARF)
|
||||
arf_src_index = get_arf2_src_index(cpi);
|
||||
if (arf_src_index &&
|
||||
is_forced_keyframe_pending(cpi->lookahead, arf_src_index)) {
|
||||
arf_src_index = 0;
|
||||
*flush = 1;
|
||||
}
|
||||
|
||||
if (arf_src_index) {
|
||||
source = setup_arf_or_arf2(cpi, arf_src_index, 1, temporal_filtered,
|
||||
frame_params);
|
||||
}
|
||||
|
||||
cpi->rc.is_bwd_ref_frame = 0;
|
||||
int brf_src_index = get_brf_src_index(cpi);
|
||||
if (brf_src_index) {
|
||||
assert(brf_src_index <= cpi->rc.frames_to_key);
|
||||
if ((source = av1_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
|
||||
cm->showable_frame = 1;
|
||||
frame_params->show_frame = 0;
|
||||
|
||||
if (cpi->oxcf.pass < 2) {
|
||||
// In second pass, the buffer updates configure will be set
|
||||
// in the function av1_rc_get_second_pass_params
|
||||
av1_configure_buffer_updates(cpi, BRF_UPDATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!source) {
|
||||
// Get last frame source.
|
||||
if (cm->current_frame.frame_number > 0) {
|
||||
*last_source = av1_lookahead_peek(cpi->lookahead, -1);
|
||||
}
|
||||
// Read in the source frame.
|
||||
source = av1_lookahead_pop(cpi->lookahead, *flush);
|
||||
|
||||
if (source != NULL) {
|
||||
frame_params->show_frame = 1;
|
||||
|
||||
// Check to see if the frame should be encoded as an arf overlay.
|
||||
check_src_altref(cpi, source);
|
||||
}
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
|
||||
uint8_t *const dest, unsigned int *frame_flags) {
|
||||
EncodeFrameParams frame_params = { 0, 0, 0 };
|
||||
EncodeFrameResults frame_results = { 0 };
|
||||
uint8_t *const dest, unsigned int *frame_flags,
|
||||
int64_t *const time_stamp, int64_t *const time_end,
|
||||
const aom_rational_t *const timebase, int flush) {
|
||||
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
|
||||
EncodeFrameInput frame_input;
|
||||
EncodeFrameParams frame_params;
|
||||
EncodeFrameResults frame_results;
|
||||
memset(&frame_input, 0, sizeof(frame_input));
|
||||
memset(&frame_params, 0, sizeof(frame_params));
|
||||
memset(&frame_results, 0, sizeof(frame_results));
|
||||
|
||||
int temporal_filtered = 0;
|
||||
struct lookahead_entry *source = NULL;
|
||||
struct lookahead_entry *last_source = NULL;
|
||||
if (cm->show_existing_frame) {
|
||||
source = av1_lookahead_pop(cpi->lookahead, flush);
|
||||
} else {
|
||||
source = choose_frame_source(cpi, &temporal_filtered, &flush, &last_source,
|
||||
&frame_params);
|
||||
}
|
||||
|
||||
if (source == NULL) { // If no source was found, we can't encode a frame.
|
||||
if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
|
||||
av1_end_first_pass(cpi); /* get last stats packet */
|
||||
cpi->twopass.first_pass_done = 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
frame_input.source = temporal_filtered ? &cpi->alt_ref_buffer : &source->img;
|
||||
frame_input.last_source = last_source != NULL ? &last_source->img : NULL;
|
||||
frame_input.ts_duration = source->ts_end - source->ts_start;
|
||||
|
||||
*time_stamp = source->ts_start;
|
||||
*time_end = source->ts_end;
|
||||
if (source->ts_start < cpi->first_time_stamp_ever) {
|
||||
cpi->first_time_stamp_ever = source->ts_start;
|
||||
cpi->last_end_time_stamp_seen = source->ts_start;
|
||||
}
|
||||
|
||||
av1_apply_encoding_flags(cpi, source->flags);
|
||||
if (!cm->show_existing_frame)
|
||||
*frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
|
||||
cpi->frame_flags = *frame_flags;
|
||||
|
||||
if (frame_params.show_frame ||
|
||||
(cm->show_existing_frame && cpi->rc.is_src_frame_alt_ref)) {
|
||||
// Shown frames and arf-overlay frames need frame-rate considering
|
||||
adjust_frame_rate(cpi, source);
|
||||
}
|
||||
|
||||
if (cm->show_existing_frame) {
|
||||
// show_existing_frame implies this frame is shown!
|
||||
frame_params.show_frame = 1;
|
||||
} else {
|
||||
// Retain the RF_LEVEL for the current newly coded frame.
|
||||
cm->cur_frame->frame_rf_level =
|
||||
cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
|
||||
|
||||
if (cpi->film_grain_table) {
|
||||
cm->seq_params.film_grain_params_present = aom_film_grain_table_lookup(
|
||||
cpi->film_grain_table, *time_stamp, *time_end, 0 /* =erase */,
|
||||
&cm->film_grain_params);
|
||||
}
|
||||
cm->cur_frame->film_grain_params_present =
|
||||
cm->seq_params.film_grain_params_present;
|
||||
|
||||
// only one operating point supported now
|
||||
const int64_t pts64 = ticks_to_timebase_units(timebase, *time_stamp);
|
||||
if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR;
|
||||
cpi->common.frame_presentation_time = (uint32_t)pts64;
|
||||
}
|
||||
|
||||
if (oxcf->pass == 2 &&
|
||||
(!cm->show_existing_frame || cpi->rc.is_src_frame_alt_ref)) {
|
||||
// GF_GROUP needs updating for arf overlays as well as non-show-existing
|
||||
av1_rc_get_second_pass_params(cpi, &frame_params);
|
||||
}
|
||||
if (cm->show_existing_frame && frame_params.frame_type != KEY_FRAME) {
|
||||
// Force show-existing frames to be INTER, except forward keyframes
|
||||
frame_params.frame_type = INTER_FRAME;
|
||||
}
|
||||
|
||||
if (!cm->show_existing_frame) {
|
||||
cm->using_qmatrix = cpi->oxcf.using_qm;
|
||||
cm->min_qmlevel = cpi->oxcf.qm_minlevel;
|
||||
cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
|
||||
if (cpi->twopass.gf_group.index == 1 && cpi->oxcf.enable_tpl_model) {
|
||||
av1_set_frame_size(cpi, cm->width, cm->height);
|
||||
av1_tpl_setup_stats(cpi, &frame_input);
|
||||
}
|
||||
}
|
||||
|
||||
frame_params.frame_flags = frame_flags;
|
||||
|
||||
@@ -143,17 +837,82 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
|
||||
// TODO(david.turner@argondesign.com): Change all the encode strategy to
|
||||
// modify frame_params instead of cm or cpi.
|
||||
|
||||
// Apply external override flags
|
||||
set_ext_overrides(cpi, &frame_params);
|
||||
// Per-frame encode speed. In theory this can vary, but things may have been
|
||||
// written assuming speed-level will not change within a sequence, so this
|
||||
// parameter should be used with caution.
|
||||
frame_params.speed = oxcf->speed;
|
||||
|
||||
// Work out which reference frame slots may be used.
|
||||
frame_params.ref_frame_flags = get_ref_frame_flags(cpi);
|
||||
// Work out some encoding parameters specific to the pass:
|
||||
if (oxcf->pass == 0) {
|
||||
if (cpi->oxcf.rc_mode == AOM_CBR) {
|
||||
av1_rc_get_one_pass_cbr_params(cpi, &frame_params);
|
||||
} else {
|
||||
av1_rc_get_one_pass_vbr_params(cpi, &frame_params);
|
||||
}
|
||||
} else if (oxcf->pass == 1) {
|
||||
av1_setup_frame_size(cpi);
|
||||
cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(&cpi->oxcf);
|
||||
if (!cpi->refresh_alt_ref_frame && (cm->current_frame.frame_number == 0 ||
|
||||
(cpi->frame_flags & FRAMEFLAGS_KEY))) {
|
||||
frame_params.frame_type = KEY_FRAME;
|
||||
} else {
|
||||
frame_params.frame_type = INTER_FRAME;
|
||||
}
|
||||
} else if (oxcf->pass == 2) {
|
||||
#if CONFIG_MISMATCH_DEBUG
|
||||
mismatch_move_frame_idx_w();
|
||||
#endif
|
||||
#if TXCOEFF_COST_TIMER
|
||||
cm->txcoeff_cost_timer = 0;
|
||||
cm->txcoeff_cost_count = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (av1_encode(cpi, dest, &frame_params, &frame_results) != AOM_CODEC_OK) {
|
||||
if (oxcf->pass == 0 || oxcf->pass == 2) {
|
||||
// Apply external override flags
|
||||
set_ext_overrides(cpi, &frame_params);
|
||||
|
||||
// Work out which reference frame slots may be used.
|
||||
frame_params.ref_frame_flags = get_ref_frame_flags(cpi);
|
||||
}
|
||||
|
||||
if (oxcf->pass == 0 || oxcf->pass == 2) {
|
||||
frame_params.primary_ref_frame =
|
||||
choose_primary_ref_frame(cpi, &frame_params);
|
||||
frame_params.order_offset = get_order_offset(cpi, &frame_params);
|
||||
}
|
||||
|
||||
if (av1_encode(cpi, dest, &frame_input, &frame_params, &frame_results) !=
|
||||
AOM_CODEC_OK) {
|
||||
return AOM_CODEC_ERROR;
|
||||
}
|
||||
|
||||
if (oxcf->pass == 2) {
|
||||
#if TXCOEFF_COST_TIMER
|
||||
cm->cum_txcoeff_cost_timer += cm->txcoeff_cost_timer;
|
||||
fprintf(stderr,
|
||||
"\ntxb coeff cost block number: %ld, frame time: %ld, cum time %ld "
|
||||
"in us\n",
|
||||
cm->txcoeff_cost_count, cm->txcoeff_cost_timer,
|
||||
cm->cum_txcoeff_cost_timer);
|
||||
#endif
|
||||
av1_twopass_postencode_update(cpi);
|
||||
}
|
||||
|
||||
if (oxcf->pass == 0 || oxcf->pass == 2) {
|
||||
update_fb_of_context_type(cpi, &frame_params, cpi->fb_of_context_type);
|
||||
set_additional_frame_flags(cm, frame_params.frame_flags);
|
||||
update_rc_counts(cpi);
|
||||
check_show_existing_frame(cpi); // Is next frame a show_existing frame?
|
||||
}
|
||||
|
||||
// Unpack frame_results:
|
||||
*size = frame_results.size;
|
||||
|
||||
// Leave a signal for a higher level caller about if this frame is droppable
|
||||
if (*size > 0) {
|
||||
cpi->droppable = is_frame_droppable(cpi);
|
||||
}
|
||||
|
||||
return AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
+14
-1
@@ -16,11 +16,24 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "aom/aom_encoder.h"
|
||||
|
||||
#include "av1/encoder/encoder.h"
|
||||
#include "av1/encoder/firstpass.h"
|
||||
|
||||
// This function will implement high-level encode strategy, choosing frame type,
|
||||
// frame placement, etc. It populates an EncodeFrameParams struct with the
|
||||
// results of these decisions and then calls av1_encode()
|
||||
int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
|
||||
uint8_t *const dest, unsigned int *frame_flags);
|
||||
uint8_t *const dest, unsigned int *frame_flags,
|
||||
int64_t *const time_stamp, int64_t *const time_end,
|
||||
const aom_rational_t *const timebase, int flush);
|
||||
|
||||
// Set individual buffer update flags based on frame reference type
|
||||
void av1_configure_buffer_updates(AV1_COMP *const cpi,
|
||||
const FRAME_UPDATE_TYPE type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+118
-109
@@ -600,7 +600,7 @@ static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
|
||||
return;
|
||||
}
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
x->source_variance = av1_high_get_sby_perpixel_variance(
|
||||
cpi, &x->plane[0].src, bsize, xd->bd);
|
||||
} else {
|
||||
@@ -613,8 +613,7 @@ static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
|
||||
x->edge_strength = UINT16_MAX;
|
||||
} else {
|
||||
x->edge_strength =
|
||||
edge_strength(&x->plane[0].src, bsize,
|
||||
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd);
|
||||
edge_strength(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
|
||||
}
|
||||
// Save rdmult before it might be changed, so it can be restored later.
|
||||
orig_rdmult = x->rdmult;
|
||||
@@ -2180,7 +2179,8 @@ static void simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
|
||||
|
||||
// Get a copy of the prediction output
|
||||
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
|
||||
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
|
||||
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
|
||||
AOM_PLANE_Y, AOM_PLANE_Y);
|
||||
|
||||
aom_clear_system_state();
|
||||
|
||||
@@ -2787,77 +2787,6 @@ static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
|
||||
}
|
||||
}
|
||||
|
||||
#define FEATURE_SIZE 19
|
||||
static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
|
||||
2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
|
||||
0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f,
|
||||
0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f,
|
||||
0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
|
||||
2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f,
|
||||
-1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f,
|
||||
-0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f,
|
||||
0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
|
||||
2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f,
|
||||
-0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f,
|
||||
-1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f,
|
||||
0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
|
||||
1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f,
|
||||
-0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f,
|
||||
-1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f,
|
||||
-0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
|
||||
2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f,
|
||||
-0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f,
|
||||
-2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f,
|
||||
0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
|
||||
-1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f,
|
||||
-0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f,
|
||||
3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f,
|
||||
-0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
|
||||
-1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f,
|
||||
-4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f,
|
||||
1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f,
|
||||
-0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
|
||||
-2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f,
|
||||
-2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f,
|
||||
0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f,
|
||||
-0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
|
||||
-1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f,
|
||||
-3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f,
|
||||
0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f,
|
||||
-0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
|
||||
-1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f,
|
||||
-1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f,
|
||||
0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f,
|
||||
0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
|
||||
};
|
||||
|
||||
// split_score indicates confidence of picking split partition;
|
||||
// none_score indicates confidence of picking none partition;
|
||||
static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
|
||||
@@ -2980,7 +2909,7 @@ static void ml_prune_rect_partition(const AV1_COMP *const cpi,
|
||||
// Variance ratios
|
||||
const MACROBLOCKD *const xd = &x->e_mbd;
|
||||
int whole_block_variance;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
whole_block_variance = av1_high_get_sby_perpixel_variance(
|
||||
cpi, &x->plane[0].src, bsize, xd->bd);
|
||||
} else {
|
||||
@@ -2998,7 +2927,7 @@ static void ml_prune_rect_partition(const AV1_COMP *const cpi,
|
||||
const int x_idx = (i & 1) * bw / 2;
|
||||
const int y_idx = (i >> 1) * bw / 2;
|
||||
buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
split_variance[i] =
|
||||
av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
|
||||
} else {
|
||||
@@ -3180,7 +3109,7 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
|
||||
src + i * block_size_high[horz_4_bs] * src_stride;
|
||||
const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
|
||||
unsigned int horz_var, vert_var, sse;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
switch (xd->bd) {
|
||||
case 10:
|
||||
horz_var = cpi->fn_ptr[horz_4_bs].vf(
|
||||
@@ -3898,6 +3827,13 @@ static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
|
||||
|
||||
(void)*tp_orig;
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
PartitionStats *part_stats = &cpi->partition_stats;
|
||||
const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
|
||||
int *partition_decisions = part_stats->partition_decisions[bsize_idx];
|
||||
int *partition_attempts = part_stats->partition_attempts[bsize_idx];
|
||||
#endif
|
||||
|
||||
// Override partition costs at the edges of the frame in the same
|
||||
// way as in read_partition (see decodeframe.c)
|
||||
if (!(has_rows && has_cols)) {
|
||||
@@ -4154,6 +4090,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
const int64_t best_remain_rdcost =
|
||||
(best_rdc.rdcost == INT64_MAX) ? INT64_MAX
|
||||
: (best_rdc.rdcost - partition_rd_cost);
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
|
||||
partition_attempts[PARTITION_NONE] += 1;
|
||||
}
|
||||
#endif
|
||||
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
|
||||
PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
|
||||
pb_source_variance = x->source_variance;
|
||||
@@ -4291,6 +4232,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
|
||||
|
||||
int idx;
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_SPLIT] += 1;
|
||||
}
|
||||
#endif
|
||||
for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
|
||||
const int x_idx = (idx & 1) * mi_step;
|
||||
const int y_idx = (idx >> 1) * mi_step;
|
||||
@@ -4469,11 +4415,16 @@ BEGIN_PARTITION_SEARCH:
|
||||
pc_tree->horizontal[0].pred_interp_filter =
|
||||
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
|
||||
}
|
||||
sum_rdc.rate = partition_cost[PARTITION_HORZ];
|
||||
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
|
||||
const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
|
||||
? INT64_MAX
|
||||
: (best_rdc.rdcost - sum_rdc.rdcost);
|
||||
sum_rdc.rate = partition_cost[PARTITION_HORZ];
|
||||
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
|
||||
partition_attempts[PARTITION_HORZ] += 1;
|
||||
}
|
||||
#endif
|
||||
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
|
||||
PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
|
||||
best_remain_rdcost);
|
||||
@@ -4551,6 +4502,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
|
||||
? INT64_MAX
|
||||
: (best_rdc.rdcost - sum_rdc.rdcost);
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
|
||||
partition_attempts[PARTITION_VERT] += 1;
|
||||
}
|
||||
#endif
|
||||
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
|
||||
PARTITION_VERT, subsize, &pc_tree->vertical[0],
|
||||
best_remain_rdcost);
|
||||
@@ -4609,7 +4565,7 @@ BEGIN_PARTITION_SEARCH:
|
||||
|
||||
if (pb_source_variance == UINT_MAX) {
|
||||
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
pb_source_variance = av1_high_get_sby_perpixel_variance(
|
||||
cpi, &x->plane[0].src, bsize, xd->bd);
|
||||
} else {
|
||||
@@ -4770,6 +4726,18 @@ BEGIN_PARTITION_SEARCH:
|
||||
pc_tree->horizontala[2].ref_selected[0] = split_mbmi[2]->ref_frame[0];
|
||||
}
|
||||
}
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
{
|
||||
RD_STATS tmp_sum_rdc;
|
||||
av1_init_rd_stats(&tmp_sum_rdc);
|
||||
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
|
||||
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
|
||||
if (!frame_is_intra_only(cm) &&
|
||||
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_HORZ_A] += 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
|
||||
pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
|
||||
PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
|
||||
@@ -4829,6 +4797,18 @@ BEGIN_PARTITION_SEARCH:
|
||||
pc_tree->horizontalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
|
||||
}
|
||||
}
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
{
|
||||
RD_STATS tmp_sum_rdc;
|
||||
av1_init_rd_stats(&tmp_sum_rdc);
|
||||
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
|
||||
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
|
||||
if (!frame_is_intra_only(cm) &&
|
||||
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_HORZ_B] += 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
|
||||
pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
|
||||
PARTITION_HORZ_B, mi_row, mi_col, subsize,
|
||||
@@ -4886,6 +4866,18 @@ BEGIN_PARTITION_SEARCH:
|
||||
pc_tree->verticala[2].ref_selected[0] = split_mbmi[1]->ref_frame[0];
|
||||
}
|
||||
}
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
{
|
||||
RD_STATS tmp_sum_rdc;
|
||||
av1_init_rd_stats(&tmp_sum_rdc);
|
||||
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
|
||||
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
|
||||
if (!frame_is_intra_only(cm) &&
|
||||
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_VERT_A] += 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
|
||||
pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
|
||||
PARTITION_VERT_A, mi_row, mi_col, bsize2,
|
||||
@@ -4942,6 +4934,18 @@ BEGIN_PARTITION_SEARCH:
|
||||
pc_tree->verticalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
|
||||
}
|
||||
}
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
{
|
||||
RD_STATS tmp_sum_rdc;
|
||||
av1_init_rd_stats(&tmp_sum_rdc);
|
||||
tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
|
||||
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
|
||||
if (!frame_is_intra_only(cm) &&
|
||||
best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_VERT_B] += 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
|
||||
pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
|
||||
PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
|
||||
@@ -5000,6 +5004,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
|
||||
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_HORZ_4] += 1;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const int this_mi_row = mi_row + i * quarter_step;
|
||||
|
||||
@@ -5046,6 +5055,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
sum_rdc.rate = partition_cost[PARTITION_VERT_4];
|
||||
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
|
||||
partition_attempts[PARTITION_VERT_4] += 1;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const int this_mi_col = mi_col + i * quarter_step;
|
||||
|
||||
@@ -5083,6 +5097,11 @@ BEGIN_PARTITION_SEARCH:
|
||||
// Did not find a valid partition, go back and search again, with less
|
||||
// constraint on which partition types to search.
|
||||
x->must_find_valid_partition = 1;
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm)) {
|
||||
part_stats->partition_redo += 1;
|
||||
}
|
||||
#endif
|
||||
goto BEGIN_PARTITION_SEARCH;
|
||||
}
|
||||
|
||||
@@ -5093,6 +5112,13 @@ BEGIN_PARTITION_SEARCH:
|
||||
(void)best_rd;
|
||||
*rd_cost = best_rdc;
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
if (!frame_is_intra_only(cm) && best_rdc.rate < INT_MAX &&
|
||||
best_rdc.dist < INT64_MAX) {
|
||||
partition_decisions[pc_tree->partitioning] += 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
|
||||
pc_tree->index != 3) {
|
||||
if (bsize == cm->seq_params.sb_size) {
|
||||
@@ -5643,13 +5669,11 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
|
||||
sb_size, BLOCK_4X4, &dummy_rdc, INT64_MAX, pc_root,
|
||||
NULL);
|
||||
}
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
// TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
|
||||
if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 &&
|
||||
cm->tile_rows == 1) {
|
||||
av1_inter_mode_data_fit(tile_data, x->rdmult);
|
||||
}
|
||||
#endif
|
||||
if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
|
||||
(tile_info->mi_row_end > (mi_row + mib_size))) {
|
||||
if (sb_cols_in_tile == 1)
|
||||
@@ -5805,9 +5829,7 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
|
||||
const TileInfo *const tile_info = &this_tile->tile_info;
|
||||
int mi_row;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
av1_inter_mode_data_init(this_tile);
|
||||
#endif
|
||||
|
||||
av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
|
||||
tile_info->mi_col_end, tile_row);
|
||||
@@ -6350,11 +6372,10 @@ static void encode_frame_internal(AV1_COMP *cpi) {
|
||||
do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
|
||||
!(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
|
||||
TransformationType model;
|
||||
const int64_t ref_frame_error =
|
||||
av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
|
||||
ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
|
||||
cpi->source->y_buffer, cpi->source->y_width,
|
||||
cpi->source->y_height, cpi->source->y_stride);
|
||||
const int64_t ref_frame_error = av1_frame_error(
|
||||
is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
|
||||
ref_buf[frame]->y_stride, cpi->source->y_buffer,
|
||||
cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride);
|
||||
|
||||
if (ref_frame_error == 0) continue;
|
||||
|
||||
@@ -6380,9 +6401,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
|
||||
|
||||
if (tmp_wm_params.wmtype != IDENTITY) {
|
||||
const int64_t warp_error = av1_refine_integerized_param(
|
||||
&tmp_wm_params, tmp_wm_params.wmtype,
|
||||
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
|
||||
ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
|
||||
&tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd),
|
||||
xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
|
||||
ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
|
||||
cpi->source->y_buffer, cpi->source->y_width,
|
||||
cpi->source->y_height, cpi->source->y_stride, 5,
|
||||
@@ -6491,20 +6511,6 @@ void av1_encode_frame(AV1_COMP *cpi) {
|
||||
// rather than the potential full set of 16 transforms
|
||||
cm->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
|
||||
|
||||
if (cm->show_frame == 0) {
|
||||
int arf_offset = AOMMIN(
|
||||
(MAX_GF_INTERVAL - 1),
|
||||
cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
|
||||
int brf_offset =
|
||||
cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
|
||||
arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
|
||||
current_frame->order_hint = current_frame->frame_number + arf_offset;
|
||||
} else {
|
||||
current_frame->order_hint = current_frame->frame_number;
|
||||
}
|
||||
current_frame->order_hint %=
|
||||
(1 << (cm->seq_params.order_hint_info.order_hint_bits_minus_1 + 1));
|
||||
|
||||
// Make sure segment_id is no larger than last_active_segid.
|
||||
if (cm->seg.enabled && cm->seg.update_map) {
|
||||
const int mi_rows = cm->mi_rows;
|
||||
@@ -6520,7 +6526,9 @@ void av1_encode_frame(AV1_COMP *cpi) {
|
||||
}
|
||||
|
||||
av1_setup_frame_buf_refs(cm);
|
||||
if (cpi->sf.selective_ref_frame >= 3) enforce_max_ref_frames(cpi);
|
||||
if (cpi->sf.selective_ref_frame >= 3 && cpi->oxcf.max_reference_frames == 7) {
|
||||
enforce_max_ref_frames(cpi);
|
||||
}
|
||||
av1_setup_frame_sign_bias(cm);
|
||||
|
||||
#if CONFIG_MISMATCH_DEBUG
|
||||
@@ -6830,7 +6838,8 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
|
||||
xd->block_ref_scale_factors[ref], num_planes);
|
||||
}
|
||||
|
||||
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
|
||||
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
|
||||
av1_num_planes(cm) - 1);
|
||||
if (mbmi->motion_mode == OBMC_CAUSAL) {
|
||||
assert(cpi->oxcf.enable_obmc == 1);
|
||||
av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
|
||||
|
||||
+5
-4
@@ -43,7 +43,7 @@ static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
|
||||
const uint8_t *src8, ptrdiff_t src_stride,
|
||||
const uint8_t *pred8, ptrdiff_t pred_stride) {
|
||||
if (check_subtract_block_size(rows, cols)) {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
|
||||
src_stride, pred8, pred_stride, xd->bd);
|
||||
return;
|
||||
@@ -54,7 +54,7 @@ static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
|
||||
return;
|
||||
}
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
|
||||
pred8, pred_stride, xd->bd);
|
||||
return;
|
||||
@@ -163,6 +163,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
|
||||
qparam.tx_size = tx_size;
|
||||
qparam.qmatrix = qmatrix;
|
||||
qparam.iqmatrix = iqmatrix;
|
||||
qparam.use_quant_b_adapt = cm->use_quant_b_adapt;
|
||||
TxfmParam txfm_param;
|
||||
txfm_param.tx_type = tx_type;
|
||||
txfm_param.tx_size = tx_size;
|
||||
@@ -171,7 +172,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
|
||||
txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
|
||||
|
||||
txfm_param.bd = xd->bd;
|
||||
txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
|
||||
txfm_param.is_hbd = is_cur_buf_hbd(xd);
|
||||
|
||||
av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
|
||||
|
||||
@@ -431,7 +432,7 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
|
||||
|
||||
if (p->eobs[block] > 0) {
|
||||
txfm_param.bd = xd->bd;
|
||||
txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
|
||||
txfm_param.is_hbd = is_cur_buf_hbd(xd);
|
||||
txfm_param.tx_type = DCT_DCT;
|
||||
txfm_param.tx_size = tx_size;
|
||||
txfm_param.eob = p->eobs[block];
|
||||
|
||||
+81
-1290
File diff suppressed because it is too large
Load Diff
+106
-14
@@ -329,6 +329,7 @@ typedef struct AV1EncoderConfig {
|
||||
int enable_order_hint;
|
||||
int enable_dist_wtd_comp;
|
||||
int enable_ref_frame_mvs;
|
||||
unsigned int max_reference_frames;
|
||||
unsigned int allow_ref_frame_mvs;
|
||||
int enable_masked_comp;
|
||||
int enable_interintra_comp;
|
||||
@@ -357,6 +358,9 @@ typedef struct AV1EncoderConfig {
|
||||
unsigned int chroma_subsampling_x;
|
||||
unsigned int chroma_subsampling_y;
|
||||
int reduced_tx_type_set;
|
||||
int use_intra_dct_only;
|
||||
int use_inter_dct_only;
|
||||
int quant_b_adapt;
|
||||
int border_in_pixels;
|
||||
} AV1EncoderConfig;
|
||||
|
||||
@@ -450,7 +454,6 @@ typedef struct FRAME_COUNTS {
|
||||
[SWITCHABLE_FILTERS];
|
||||
} FRAME_COUNTS;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
#define INTER_MODE_RD_DATA_OVERALL_SIZE 6400
|
||||
|
||||
typedef struct {
|
||||
@@ -485,7 +488,6 @@ typedef struct inter_modes_info {
|
||||
int64_t est_rd_arr[MAX_INTER_MODES];
|
||||
RdIdxPair rd_idx_pair_arr[MAX_INTER_MODES];
|
||||
} InterModesInfo;
|
||||
#endif
|
||||
|
||||
// Encoder row synchronization
|
||||
typedef struct AV1RowMTSyncData {
|
||||
@@ -514,9 +516,7 @@ typedef struct TileDataEnc {
|
||||
DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
|
||||
FRAME_CONTEXT *row_ctx;
|
||||
uint8_t allow_update_cdf;
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
|
||||
#endif
|
||||
AV1RowMTSync row_mt_sync;
|
||||
AV1RowMTInfo row_mt_info;
|
||||
} TileDataEnc;
|
||||
@@ -551,9 +551,7 @@ typedef struct ThreadData {
|
||||
tran_low_t *tree_coeff_buf[MAX_MB_PLANE];
|
||||
tran_low_t *tree_qcoeff_buf[MAX_MB_PLANE];
|
||||
tran_low_t *tree_dqcoeff_buf[MAX_MB_PLANE];
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
InterModesInfo *inter_modes_info;
|
||||
#endif
|
||||
uint32_t *hash_value_buffer[2][2];
|
||||
int32_t *wsrc_buf;
|
||||
int32_t *mask_buf;
|
||||
@@ -595,6 +593,15 @@ typedef struct {
|
||||
YV12_BUFFER_CONFIG buf;
|
||||
} EncRefCntBuffer;
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
typedef struct PartitionStats {
|
||||
int partition_decisions[6][EXT_PARTITION_TYPES];
|
||||
int partition_attempts[6][EXT_PARTITION_TYPES];
|
||||
|
||||
int partition_redo;
|
||||
} PartitionStats;
|
||||
#endif
|
||||
|
||||
typedef struct AV1_COMP {
|
||||
QUANTS quants;
|
||||
ThreadData td;
|
||||
@@ -658,14 +665,12 @@ typedef struct AV1_COMP {
|
||||
// frame of the same type as the current frame).
|
||||
int fb_of_context_type[REF_FRAMES];
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
// When true, a new rule for backward (future) reference frames is in effect:
|
||||
// - BWDREF_FRAME is always the closest future frame available
|
||||
// - ALTREF2_FRAME is always the 2nd closest future frame available
|
||||
// - 'refresh_bwd_ref_frame' flag is used for updating both the BWDREF_FRAME
|
||||
// and ALTREF2_FRAME. ('refresh_alt2_ref_frame' flag is irrelevant).
|
||||
int new_bwdref_update_rule;
|
||||
#endif
|
||||
|
||||
int ext_refresh_frame_flags_pending;
|
||||
int ext_refresh_last_frame;
|
||||
@@ -718,6 +723,9 @@ typedef struct AV1_COMP {
|
||||
int ref_frame_flags;
|
||||
int ext_ref_frame_flags;
|
||||
|
||||
// speed is passed as a per-frame parameter into the encoder
|
||||
int speed;
|
||||
// sf contains fine-grained config set internally based on speed
|
||||
SPEED_FEATURES sf;
|
||||
|
||||
unsigned int max_mv_magnitude;
|
||||
@@ -865,18 +873,35 @@ typedef struct AV1_COMP {
|
||||
#endif
|
||||
// Set if screen content is set or relevant tools are enabled
|
||||
int is_screen_content_type;
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
PartitionStats partition_stats;
|
||||
#endif
|
||||
} AV1_COMP;
|
||||
|
||||
typedef struct {
|
||||
YV12_BUFFER_CONFIG *source;
|
||||
YV12_BUFFER_CONFIG *last_source;
|
||||
int64_t ts_duration;
|
||||
} EncodeFrameInput;
|
||||
|
||||
// EncodeFrameParams contains per-frame encoding parameters decided upon by
|
||||
// av1_encode_strategy() and passed down to av1_encode()
|
||||
typedef struct {
|
||||
struct EncodeFrameParams {
|
||||
int error_resilient_mode;
|
||||
FRAME_TYPE frame_type;
|
||||
int primary_ref_frame;
|
||||
int order_offset;
|
||||
int show_frame;
|
||||
|
||||
// This is a bitmask of which reference slots can be used in this frame
|
||||
int ref_frame_flags;
|
||||
|
||||
// Speed level to use for this frame: Bigger number means faster.
|
||||
int speed;
|
||||
|
||||
unsigned int *frame_flags;
|
||||
} EncodeFrameParams;
|
||||
};
|
||||
typedef struct EncodeFrameParams EncodeFrameParams;
|
||||
|
||||
// EncodeFrameResults contains information about the result of encoding a
|
||||
// single frame
|
||||
@@ -905,6 +930,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
|
||||
const aom_rational_t *timebase);
|
||||
|
||||
int av1_encode(AV1_COMP *const cpi, uint8_t *const dest,
|
||||
const EncodeFrameInput *const frame_input,
|
||||
const EncodeFrameParams *const frame_params,
|
||||
EncodeFrameResults *const frame_results);
|
||||
|
||||
@@ -918,12 +944,12 @@ aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
|
||||
|
||||
int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags);
|
||||
|
||||
void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags);
|
||||
|
||||
int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
void av1_set_frame_size(AV1_COMP *cpi, int width, int height);
|
||||
|
||||
int av1_update_entropy(AV1_COMP *cpi, int update);
|
||||
|
||||
int av1_set_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
|
||||
@@ -937,8 +963,19 @@ int av1_get_quantizer(struct AV1_COMP *cpi);
|
||||
|
||||
int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
|
||||
|
||||
int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n);
|
||||
int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n);
|
||||
// av1 uses 10,000,000 ticks/second as time stamp
|
||||
#define TICKS_PER_SEC 10000000LL
|
||||
|
||||
static INLINE int64_t timebase_units_to_ticks(const aom_rational_t *timebase,
|
||||
int64_t n) {
|
||||
return n * TICKS_PER_SEC * timebase->num / timebase->den;
|
||||
}
|
||||
|
||||
static INLINE int64_t ticks_to_timebase_units(const aom_rational_t *timebase,
|
||||
int64_t n) {
|
||||
const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
|
||||
return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
|
||||
}
|
||||
|
||||
static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
|
||||
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
|
||||
@@ -975,6 +1012,13 @@ static INLINE int enc_is_ref_frame_buf(const AV1_COMMON *const cm,
|
||||
return (ref_frame <= ALTREF_FRAME);
|
||||
}
|
||||
|
||||
static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, RefCntBuffer *buf) {
|
||||
assert(buf != NULL);
|
||||
ensure_mv_buffer(buf, cm);
|
||||
buf->width = cm->width;
|
||||
buf->height = cm->height;
|
||||
}
|
||||
|
||||
// Token buffer is only used for palette tokens.
|
||||
static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
|
||||
int sb_size_log2,
|
||||
@@ -1046,6 +1090,8 @@ static INLINE int *cond_cost_list(const struct AV1_COMP *cpi, int *cost_list) {
|
||||
|
||||
void av1_new_framerate(AV1_COMP *cpi, double framerate);
|
||||
|
||||
void av1_setup_frame_size(AV1_COMP *cpi);
|
||||
|
||||
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
|
||||
|
||||
// Returns 1 if a frame is scaled and 0 otherwise.
|
||||
@@ -1077,6 +1123,52 @@ static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) {
|
||||
// field.
|
||||
aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
|
||||
|
||||
#if CONFIG_COLLECT_PARTITION_STATS
|
||||
static INLINE void av1_print_partition_stats(PartitionStats *part_stats) {
|
||||
FILE *f = fopen("partition_stats.csv", "w");
|
||||
if (!f) {
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, "bsize,redo,");
|
||||
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
|
||||
fprintf(f, "decision_%d,", part);
|
||||
}
|
||||
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
|
||||
fprintf(f, "attempt_%d,", part);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
||||
const int bsizes[6] = { 128, 64, 32, 16, 8, 4 };
|
||||
|
||||
for (int bsize_idx = 0; bsize_idx < 6; bsize_idx++) {
|
||||
fprintf(f, "%d,%d,", bsizes[bsize_idx], part_stats->partition_redo);
|
||||
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
|
||||
fprintf(f, "%d,", part_stats->partition_decisions[bsize_idx][part]);
|
||||
}
|
||||
for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
|
||||
fprintf(f, "%d,", part_stats->partition_attempts[bsize_idx][part]);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static INLINE int av1_get_bsize_idx_for_part_stats(BLOCK_SIZE bsize) {
|
||||
assert(bsize == BLOCK_128X128 || bsize == BLOCK_64X64 ||
|
||||
bsize == BLOCK_32X32 || bsize == BLOCK_16X16 || bsize == BLOCK_8X8);
|
||||
switch (bsize) {
|
||||
case BLOCK_128X128: return 0;
|
||||
case BLOCK_64X64: return 1;
|
||||
case BLOCK_32X32: return 2;
|
||||
case BLOCK_16X16: return 3;
|
||||
case BLOCK_8X8: return 4;
|
||||
case BLOCK_4X4: return 5;
|
||||
default: assert(0 && "Invalid bsize for partition_stats."); return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
+94
-34
@@ -284,6 +284,17 @@ static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
|
||||
return av1_cost_literal(1);
|
||||
}
|
||||
|
||||
static const int golomb_bits_cost[32] = {
|
||||
0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
|
||||
512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
|
||||
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
|
||||
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
|
||||
};
|
||||
static const int golomb_cost_diff[32] = {
|
||||
0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
|
||||
512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static INLINE int get_golomb_cost(int abs_qc) {
|
||||
if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
|
||||
const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
|
||||
@@ -293,6 +304,27 @@ static INLINE int get_golomb_cost(int abs_qc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
|
||||
int *diff) {
|
||||
const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
|
||||
int golomb_bits = 0;
|
||||
if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
|
||||
*diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
|
||||
|
||||
if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
|
||||
int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
|
||||
if (r < 32) {
|
||||
golomb_bits = golomb_bits_cost[r];
|
||||
*diff += golomb_cost_diff[r];
|
||||
} else {
|
||||
golomb_bits = get_golomb_cost(level);
|
||||
*diff += (r & (r - 1)) == 0 ? 1024 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
return coeff_lps[base_range] + golomb_bits;
|
||||
}
|
||||
|
||||
static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
|
||||
const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
|
||||
return coeff_lps[base_range] + get_golomb_cost(level);
|
||||
@@ -732,7 +764,8 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb(
|
||||
|
||||
av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
|
||||
|
||||
const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost;
|
||||
const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
|
||||
coeff_costs->lps_cost;
|
||||
int c = eob - 1;
|
||||
{
|
||||
const int pos = scan[c];
|
||||
@@ -758,7 +791,7 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb(
|
||||
}
|
||||
}
|
||||
}
|
||||
const int(*base_cost)[4] = coeff_costs->base_cost;
|
||||
const int(*base_cost)[8] = coeff_costs->base_cost;
|
||||
for (c = eob - 2; c >= 1; --c) {
|
||||
const int pos = scan[c];
|
||||
const int coeff_ctx = coeff_contexts[pos];
|
||||
@@ -1262,21 +1295,28 @@ static int hbt_create_hashes(TxbInfo *txb_info,
|
||||
txb_eob_costs, p, block, fast_mode, rate_cost);
|
||||
}
|
||||
|
||||
static AOM_FORCE_INLINE int get_coeff_cost_simple(
|
||||
static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
|
||||
int ci, tran_low_t abs_qc, int coeff_ctx,
|
||||
const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
|
||||
const uint8_t *levels) {
|
||||
const uint8_t *levels, int *cost_low) {
|
||||
// this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
|
||||
// and not the last (scan_idx != eob - 1)
|
||||
assert(ci > 0);
|
||||
int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
|
||||
int diff = 0;
|
||||
if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
|
||||
if (abs_qc) {
|
||||
cost += av1_cost_literal(1);
|
||||
if (abs_qc > NUM_BASE_LEVELS) {
|
||||
const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
|
||||
cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
|
||||
int brcost_diff = 0;
|
||||
cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
|
||||
&brcost_diff);
|
||||
diff += brcost_diff;
|
||||
}
|
||||
}
|
||||
*cost_low = cost - diff;
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
@@ -1369,13 +1409,23 @@ static INLINE void update_coeff_general(
|
||||
const int64_t rd = RDCOST(rdmult, rate, dist);
|
||||
|
||||
tran_low_t qc_low, dqc_low;
|
||||
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
|
||||
const tran_low_t abs_qc_low = abs_qc - 1;
|
||||
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
|
||||
const int rate_low =
|
||||
get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
|
||||
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
|
||||
const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
|
||||
tran_low_t abs_qc_low;
|
||||
int64_t dist_low, rd_low;
|
||||
int rate_low;
|
||||
if (abs_qc == 1) {
|
||||
abs_qc_low = qc_low = dqc_low = 0;
|
||||
dist_low = dist0;
|
||||
rate_low = txb_costs->base_cost[coeff_ctx][0];
|
||||
} else {
|
||||
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
|
||||
abs_qc_low = abs_qc - 1;
|
||||
dist_low = get_coeff_dist(tqc, dqc_low, shift);
|
||||
rate_low =
|
||||
get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
|
||||
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
|
||||
}
|
||||
|
||||
rd_low = RDCOST(rdmult, rate_low, dist_low);
|
||||
if (rd_low < rd) {
|
||||
qcoeff[ci] = qc_low;
|
||||
dqcoeff[ci] = dqc_low;
|
||||
@@ -1409,28 +1459,28 @@ static AOM_FORCE_INLINE void update_coeff_simple(
|
||||
*accu_rate += txb_costs->base_cost[coeff_ctx][0];
|
||||
} else {
|
||||
const tran_low_t abs_qc = abs(qc);
|
||||
const tran_low_t tqc = tcoeff[ci];
|
||||
const tran_low_t dqc = dqcoeff[ci];
|
||||
const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs,
|
||||
bwl, tx_class, levels);
|
||||
if (abs(dqc) < abs(tqc)) {
|
||||
const tran_low_t abs_tqc = abs(tcoeff[ci]);
|
||||
const tran_low_t abs_dqc = abs(dqcoeff[ci]);
|
||||
int rate_low = 0;
|
||||
const int rate = get_two_coeff_cost_simple(
|
||||
ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
|
||||
if (abs_dqc < abs_tqc) {
|
||||
*accu_rate += rate;
|
||||
return;
|
||||
}
|
||||
const int64_t dist = get_coeff_dist(tqc, dqc, shift);
|
||||
|
||||
const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
|
||||
const int64_t rd = RDCOST(rdmult, rate, dist);
|
||||
|
||||
const int sign = (qc < 0) ? 1 : 0;
|
||||
tran_low_t qc_low, dqc_low;
|
||||
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
|
||||
const tran_low_t abs_qc_low = abs_qc - 1;
|
||||
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
|
||||
const int rate_low = get_coeff_cost_simple(
|
||||
ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels);
|
||||
const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
|
||||
const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
|
||||
const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
|
||||
|
||||
if (rd_low < rd) {
|
||||
qcoeff[ci] = qc_low;
|
||||
dqcoeff[ci] = dqc_low;
|
||||
const int sign = (qc < 0) ? 1 : 0;
|
||||
qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
|
||||
dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
|
||||
levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
|
||||
*accu_rate += rate_low;
|
||||
} else {
|
||||
@@ -1468,14 +1518,24 @@ static AOM_FORCE_INLINE void update_coeff_eob(
|
||||
int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
|
||||
|
||||
tran_low_t qc_low, dqc_low;
|
||||
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
|
||||
const tran_low_t abs_qc_low = abs_qc - 1;
|
||||
const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
|
||||
const int rate_low =
|
||||
get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx,
|
||||
txb_costs, bwl, tx_class, levels);
|
||||
const int64_t rd_low =
|
||||
RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
|
||||
tran_low_t abs_qc_low;
|
||||
int64_t dist_low, rd_low;
|
||||
int rate_low;
|
||||
if (abs_qc == 1) {
|
||||
abs_qc_low = 0;
|
||||
dqc_low = qc_low = 0;
|
||||
dist_low = 0;
|
||||
rate_low = txb_costs->base_cost[coeff_ctx][0];
|
||||
rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
|
||||
} else {
|
||||
get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
|
||||
abs_qc_low = abs_qc - 1;
|
||||
dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
|
||||
rate_low =
|
||||
get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
|
||||
dc_sign_ctx, txb_costs, bwl, tx_class, levels);
|
||||
rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
|
||||
}
|
||||
|
||||
int lower_level_new_eob = 0;
|
||||
const int new_eob = si + 1;
|
||||
|
||||
-6
@@ -421,11 +421,9 @@ static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
|
||||
(int32_t *)aom_memalign(
|
||||
16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
|
||||
(InterModesInfo *)aom_malloc(
|
||||
sizeof(*thread_data->td->inter_modes_info)));
|
||||
#endif
|
||||
|
||||
for (int x = 0; x < 2; x++)
|
||||
for (int y = 0; y < 2; y++)
|
||||
@@ -544,9 +542,7 @@ static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
|
||||
thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
|
||||
thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
thread_data->td->mb.inter_modes_info = thread_data->td->inter_modes_info;
|
||||
#endif
|
||||
for (int x = 0; x < 2; x++) {
|
||||
for (int y = 0; y < 2; y++) {
|
||||
memcpy(thread_data->td->hash_value_buffer[x][y],
|
||||
@@ -662,9 +658,7 @@ void av1_encode_tiles_row_mt(AV1_COMP *cpi) {
|
||||
this_tile->row_mt_info.current_mi_row = this_tile->tile_info.mi_row_start;
|
||||
this_tile->row_mt_info.num_threads_working = 0;
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
av1_inter_mode_data_init(this_tile);
|
||||
#endif
|
||||
av1_zero_above_context(cm, &cpi->td.mb.e_mbd,
|
||||
this_tile->tile_info.mi_col_start,
|
||||
this_tile->tile_info.mi_col_end, tile_row);
|
||||
|
||||
+50
-353
@@ -36,6 +36,7 @@
|
||||
#include "av1/encoder/encodemb.h"
|
||||
#include "av1/encoder/encodemv.h"
|
||||
#include "av1/encoder/encoder.h"
|
||||
#include "av1/encoder/encode_strategy.h"
|
||||
#include "av1/encoder/extend.h"
|
||||
#include "av1/encoder/firstpass.h"
|
||||
#include "av1/encoder/mcomp.h"
|
||||
@@ -380,7 +381,7 @@ static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
// Override the default variance function to use MSE.
|
||||
v_fn_ptr.vf = get_block_variance_fn(bsize);
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
|
||||
}
|
||||
|
||||
@@ -449,18 +450,6 @@ static int find_fp_qindex(aom_bit_depth_t bit_depth) {
|
||||
return i;
|
||||
}
|
||||
|
||||
static void set_first_pass_params(AV1_COMP *cpi) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
if (!cpi->refresh_alt_ref_frame && (cm->current_frame.frame_number == 0 ||
|
||||
(cpi->frame_flags & FRAMEFLAGS_KEY))) {
|
||||
cm->current_frame.frame_type = KEY_FRAME;
|
||||
} else {
|
||||
cm->current_frame.frame_type = INTER_FRAME;
|
||||
}
|
||||
// Do not use periodic key frames.
|
||||
cpi->rc.frames_to_key = INT_MAX;
|
||||
}
|
||||
|
||||
static double raw_motion_error_stdev(int *raw_motion_err_list,
|
||||
int raw_motion_err_counts) {
|
||||
int64_t sum_raw_err = 0;
|
||||
@@ -486,7 +475,7 @@ static double raw_motion_error_stdev(int *raw_motion_err_list,
|
||||
|
||||
#define UL_INTRA_THRESH 50
|
||||
#define INVALID_ROW -1
|
||||
void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
|
||||
int mb_row, mb_col;
|
||||
MACROBLOCK *const x = &cpi->td.mb;
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
@@ -558,7 +547,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
brightness_factor = 0.0;
|
||||
neutral_count = 0.0;
|
||||
|
||||
set_first_pass_params(cpi);
|
||||
// Do not use periodic key frames.
|
||||
cpi->rc.frames_to_key = INT_MAX;
|
||||
|
||||
av1_set_quantizer(cm, qindex);
|
||||
|
||||
av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x,
|
||||
@@ -701,14 +692,15 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
// Accumulate the intra error.
|
||||
intra_error += (int64_t)this_error;
|
||||
|
||||
int stride = x->plane[0].src.stride;
|
||||
const int hbd = is_cur_buf_hbd(xd);
|
||||
const int stride = x->plane[0].src.stride;
|
||||
uint8_t *buf = x->plane[0].src.buf;
|
||||
for (int r8 = 0; r8 < 2; ++r8)
|
||||
for (int r8 = 0; r8 < 2; ++r8) {
|
||||
for (int c8 = 0; c8 < 2; ++c8) {
|
||||
int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
|
||||
frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
|
||||
buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_FP_MB_STATS
|
||||
if (cpi->use_fp_mb_stats) {
|
||||
@@ -730,7 +722,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
struct buf_2d unscaled_last_source_buf_2d;
|
||||
|
||||
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
|
||||
} else {
|
||||
@@ -745,7 +737,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
cpi->unscaled_last_source->y_buffer + recon_yoffset;
|
||||
unscaled_last_source_buf_2d.stride =
|
||||
cpi->unscaled_last_source->y_stride;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
raw_motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
|
||||
} else {
|
||||
@@ -777,7 +769,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
int gf_motion_error;
|
||||
|
||||
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
gf_motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
|
||||
} else {
|
||||
@@ -854,8 +846,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
xd->mi[0]->tx_size = TX_4X4;
|
||||
xd->mi[0]->ref_frame[0] = LAST_FRAME;
|
||||
xd->mi[0]->ref_frame[1] = NONE_FRAME;
|
||||
av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale,
|
||||
mb_col * mb_scale, NULL, bsize);
|
||||
av1_enc_build_inter_predictor(cm, xd, mb_row * mb_scale,
|
||||
mb_col * mb_scale, NULL, bsize,
|
||||
AOM_PLANE_Y, AOM_PLANE_Y);
|
||||
av1_encode_sby_pass1(cm, x, bsize);
|
||||
sum_mvr += mv.row;
|
||||
sum_mvr_abs += abs(mv.row);
|
||||
@@ -1038,7 +1031,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
|
||||
// TODO(paulwilkins): Handle the case when duration is set to 0, or
|
||||
// something less than the full time between subsequent values of
|
||||
// cpi->source_time_stamp.
|
||||
fps.duration = (double)(source->ts_end - source->ts_start);
|
||||
fps.duration = (double)ts_duration;
|
||||
|
||||
// Don't want to do output stats with a stack variable!
|
||||
twopass->this_frame_stats = fps;
|
||||
@@ -1566,7 +1559,6 @@ static int calculate_boost_bits(int frame_count, int boost,
|
||||
0);
|
||||
}
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
// #define CHCEK_GF_PARAMETER
|
||||
#ifdef CHCEK_GF_PARAMETER
|
||||
void check_frame_params(GF_GROUP *const gf_group, int gf_interval,
|
||||
@@ -1693,7 +1685,6 @@ static int construct_multi_layer_gf_structure(GF_GROUP *const gf_group,
|
||||
// Given the maximum allowed height of the pyramid structure, return the maximum
|
||||
// GF length supported by the same.
|
||||
static INLINE int get_max_gf_length(int max_pyr_height) {
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
// We allow a frame to have at most two left/right descendants before changing
|
||||
// them into to a subtree, i.e., we allow the following structure:
|
||||
/* OUT_OF_ORDER_FRAME
|
||||
@@ -1710,9 +1701,6 @@ static INLINE int get_max_gf_length(int max_pyr_height) {
|
||||
return MAX_GF_INTERVAL; // Special case: uses the old pyramid structure.
|
||||
default: assert(0 && "Invalid max_pyr_height"); return -1;
|
||||
}
|
||||
#else
|
||||
return 16;
|
||||
#endif // CONFIG_FIX_GF_LENGTH
|
||||
}
|
||||
|
||||
// Given the maximum allowed height of the pyramid structure, return the fixed
|
||||
@@ -1722,11 +1710,12 @@ int av1_rc_get_fixed_gf_length(int max_pyr_height) {
|
||||
return AOMMIN(max_gf_length_allowed, MAX_GF_INTERVAL);
|
||||
}
|
||||
|
||||
static void define_customized_gf_group_structure(AV1_COMP *cpi) {
|
||||
static void define_customized_gf_group_structure(
|
||||
AV1_COMP *cpi, const EncodeFrameParams *const frame_params) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
|
||||
const int key_frame = frame_params->frame_type == KEY_FRAME;
|
||||
|
||||
assert(rc->baseline_gf_interval >= MIN_GF_INTERVAL &&
|
||||
rc->baseline_gf_interval <=
|
||||
@@ -1796,142 +1785,11 @@ static void define_customized_gf_group_structure(AV1_COMP *cpi) {
|
||||
// It is an example of how to define a GF stucture manually. The function will
|
||||
// result in exactly the same GF group structure as
|
||||
// define_customized_gf_group_structure() when rc->baseline_gf_interval == 4
|
||||
#if USE_MANUAL_GF4_STRUCT
|
||||
#define GF_INTERVAL_4 4
|
||||
static const unsigned char gf4_multi_layer_params[][GF_FRAME_PARAMS] = {
|
||||
{
|
||||
// gf_group->index == 0 (Frame 0)
|
||||
// It can also be KEY frame. Will assign the proper value
|
||||
// in define_gf_group_structure
|
||||
OVERLAY_UPDATE, // update_type (default value)
|
||||
0, // arf_src_offset
|
||||
0, // arf_pos_in_gf
|
||||
0 // arf_update_idx
|
||||
},
|
||||
{
|
||||
// gf_group->index == 1 (Frame 4)
|
||||
ARF_UPDATE, // update_type
|
||||
GF_INTERVAL_4 - 1, // arf_src_offset
|
||||
0, // arf_pos_in_gf
|
||||
0 // arf_update_idx
|
||||
},
|
||||
{
|
||||
// gf_group->index == 2 (Frame 2)
|
||||
INTNL_ARF_UPDATE, // update_type
|
||||
(GF_INTERVAL_4 >> 1) - 1, // arf_src_offset
|
||||
0, // arf_pos_in_gf
|
||||
0 // arf_update_idx
|
||||
},
|
||||
{
|
||||
// gf_group->index == 3 (Frame 1)
|
||||
LAST_BIPRED_UPDATE, // update_type
|
||||
0, // arf_src_offset
|
||||
0, // arf_pos_in_gf
|
||||
0 // arf_update_idx
|
||||
},
|
||||
|
||||
{
|
||||
// gf_group->index == 4 (Frame 2 - OVERLAY)
|
||||
INTNL_OVERLAY_UPDATE, // update_type
|
||||
0, // arf_src_offset
|
||||
2, // arf_pos_in_gf
|
||||
0 // arf_update_idx
|
||||
},
|
||||
{
|
||||
// gf_group->index == 5 (Frame 3)
|
||||
LF_UPDATE, // update_type
|
||||
0, // arf_src_offset
|
||||
0, // arf_pos_in_gf
|
||||
1 // arf_update_idx
|
||||
}
|
||||
};
|
||||
|
||||
static int define_gf_group_structure_4(AV1_COMP *cpi) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
|
||||
|
||||
assert(rc->baseline_gf_interval == GF_INTERVAL_4);
|
||||
|
||||
const int gf_update_frames = rc->baseline_gf_interval + 2;
|
||||
int frame_index;
|
||||
|
||||
for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
|
||||
int param_idx = 0;
|
||||
|
||||
gf_group->bidir_pred_enabled[frame_index] = 0;
|
||||
|
||||
if (frame_index == 0) {
|
||||
// gf_group->arf_src_offset[frame_index] = 0;
|
||||
gf_group->brf_src_offset[frame_index] = 0;
|
||||
gf_group->bidir_pred_enabled[frame_index] = 0;
|
||||
|
||||
// For key frames the frame target rate is already set and it
|
||||
// is also the golden frame.
|
||||
if (key_frame) continue;
|
||||
|
||||
gf_group->update_type[frame_index] =
|
||||
gf4_multi_layer_params[frame_index][param_idx++];
|
||||
|
||||
if (rc->source_alt_ref_active) {
|
||||
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
|
||||
} else {
|
||||
gf_group->update_type[frame_index] = GF_UPDATE;
|
||||
}
|
||||
param_idx++;
|
||||
} else {
|
||||
gf_group->update_type[frame_index] =
|
||||
gf4_multi_layer_params[frame_index][param_idx++];
|
||||
}
|
||||
|
||||
// setup other parameters
|
||||
gf_group->rf_level[frame_index] =
|
||||
update_type_2_rf_level(gf_group->update_type[frame_index]);
|
||||
|
||||
// == arf_src_offset ==
|
||||
gf_group->arf_src_offset[frame_index] =
|
||||
gf4_multi_layer_params[frame_index][param_idx++];
|
||||
|
||||
// == arf_pos_in_gf ==
|
||||
gf_group->arf_pos_in_gf[frame_index] =
|
||||
gf4_multi_layer_params[frame_index][param_idx++];
|
||||
|
||||
// == arf_update_idx ==
|
||||
gf_group->brf_src_offset[frame_index] =
|
||||
gf4_multi_layer_params[frame_index][param_idx];
|
||||
}
|
||||
|
||||
// NOTE: We need to configure the frame at the end of the sequence + 1 that
|
||||
// will be the start frame for the next group. Otherwise prior to the
|
||||
// call to av1_rc_get_second_pass_params() the data will be undefined.
|
||||
gf_group->arf_update_idx[frame_index] = 0;
|
||||
gf_group->arf_ref_idx[frame_index] = 0;
|
||||
|
||||
if (rc->source_alt_ref_pending) {
|
||||
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
|
||||
gf_group->rf_level[frame_index] = INTER_NORMAL;
|
||||
|
||||
} else {
|
||||
gf_group->update_type[frame_index] = GF_UPDATE;
|
||||
gf_group->rf_level[frame_index] = GF_ARF_STD;
|
||||
}
|
||||
|
||||
gf_group->bidir_pred_enabled[frame_index] = 0;
|
||||
gf_group->brf_src_offset[frame_index] = 0;
|
||||
|
||||
// This value is only used for INTNL_OVERLAY_UPDATE
|
||||
gf_group->arf_pos_in_gf[frame_index] = 0;
|
||||
|
||||
return gf_update_frames;
|
||||
}
|
||||
#endif // USE_MANUAL_GF4_STRUCT
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
|
||||
static void define_gf_group_structure(AV1_COMP *cpi) {
|
||||
static void define_gf_group_structure(
|
||||
AV1_COMP *cpi, const EncodeFrameParams *const frame_params) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
const int max_pyr_height = cpi->oxcf.gf_max_pyr_height;
|
||||
const int valid_customized_gf_length =
|
||||
max_pyr_height >= MIN_PYRAMID_LVL && max_pyr_height <= MAX_PYRAMID_LVL &&
|
||||
@@ -1940,24 +1798,18 @@ static void define_gf_group_structure(AV1_COMP *cpi) {
|
||||
// used the new structure only if extra_arf is allowed
|
||||
if (valid_customized_gf_length && rc->source_alt_ref_pending &&
|
||||
cpi->extra_arf_allowed > 0) {
|
||||
#if USE_MANUAL_GF4_STRUCT
|
||||
if (rc->baseline_gf_interval == 4)
|
||||
define_gf_group_structure_4(cpi);
|
||||
else
|
||||
#endif
|
||||
define_customized_gf_group_structure(cpi);
|
||||
define_customized_gf_group_structure(cpi, frame_params);
|
||||
cpi->new_bwdref_update_rule = 1;
|
||||
return;
|
||||
} else {
|
||||
cpi->new_bwdref_update_rule = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
int i;
|
||||
int frame_index = 0;
|
||||
const int key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
|
||||
const int key_frame = frame_params->frame_type == KEY_FRAME;
|
||||
|
||||
// The use of bi-predictive frames are only enabled when following 3
|
||||
// conditions are met:
|
||||
@@ -2168,35 +2020,28 @@ static void define_gf_group_structure(AV1_COMP *cpi) {
|
||||
gf_group->brf_src_offset[frame_index] = 0;
|
||||
}
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
#define NEW_MULTI_LVL_BOOST_VBR_ALLOC 1
|
||||
|
||||
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
#define LEAF_REDUCTION_FACTOR 0.75
|
||||
static double lvl_budget_factor[MAX_PYRAMID_LVL - 1][MAX_PYRAMID_LVL - 1] = {
|
||||
{ 1.0, 0.0, 0.0 }, { 0.6, 0.4, 0 }, { 0.45, 0.35, 0.20 }
|
||||
};
|
||||
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
double group_error, int gf_arf_bits) {
|
||||
static void allocate_gf_group_bits(
|
||||
AV1_COMP *cpi, int64_t gf_group_bits, double group_error, int gf_arf_bits,
|
||||
const EncodeFrameParams *const frame_params) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
int i;
|
||||
int frame_index = 0;
|
||||
int key_frame;
|
||||
const int key_frame = frame_params->frame_type == KEY_FRAME;
|
||||
const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
|
||||
int64_t total_group_bits = gf_group_bits;
|
||||
int ext_arf_boost[MAX_EXT_ARFS];
|
||||
|
||||
define_gf_group_structure(cpi);
|
||||
define_gf_group_structure(cpi, frame_params);
|
||||
|
||||
av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
|
||||
|
||||
key_frame = cpi->common.current_frame.frame_type == KEY_FRAME;
|
||||
|
||||
// For key frames the frame target rate is already set and it
|
||||
// is also the golden frame.
|
||||
// === [frame_index == 0] ===
|
||||
@@ -2232,13 +2077,9 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
// Save.
|
||||
const int tmp_frame_index = frame_index;
|
||||
int budget_reduced_from_leaf_level = 0;
|
||||
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
|
||||
// Allocate bits to the other frames in the group.
|
||||
const int normal_frames =
|
||||
@@ -2269,7 +2110,6 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
// TODO(zoeliu): To investigate whether the allocated bits on
|
||||
// BIPRED_UPDATE frames need to be further adjusted.
|
||||
gf_group->bit_allocation[frame_index] = target_frame_size;
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
} else if (cpi->new_bwdref_update_rule &&
|
||||
gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE) {
|
||||
assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL &&
|
||||
@@ -2280,23 +2120,16 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
|
||||
gf_group->bit_allocation[arf_pos] = target_frame_size;
|
||||
// Note: Boost, if needed, is added in the next loop.
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
} else {
|
||||
assert(gf_group->update_type[frame_index] == LF_UPDATE ||
|
||||
gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
|
||||
gf_group->bit_allocation[frame_index] = target_frame_size;
|
||||
#if MULTI_LVL_BOOST_VBR_CQ
|
||||
if (cpi->new_bwdref_update_rule) {
|
||||
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
const int this_budget_reduction =
|
||||
(int)(target_frame_size * LEAF_REDUCTION_FACTOR);
|
||||
gf_group->bit_allocation[frame_index] -= this_budget_reduction;
|
||||
budget_reduced_from_leaf_level += this_budget_reduction;
|
||||
#else
|
||||
gf_group->bit_allocation[frame_index] -= (target_frame_size >> 1);
|
||||
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
}
|
||||
#endif // MULTI_LVL_BOOST_VBR_CQ
|
||||
}
|
||||
|
||||
++frame_index;
|
||||
@@ -2308,8 +2141,6 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
#if MULTI_LVL_BOOST_VBR_CQ
|
||||
if (budget_reduced_from_leaf_level > 0) {
|
||||
// Restore.
|
||||
frame_index = tmp_frame_index;
|
||||
@@ -2323,16 +2154,11 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
const int arf_pos = gf_group->arf_pos_in_gf[frame_index];
|
||||
const int this_lvl = gf_group->pyramid_level[arf_pos];
|
||||
const int dist2top = gf_group->pyramid_height - 1 - this_lvl;
|
||||
#if NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
const double lvl_boost_factor =
|
||||
lvl_budget_factor[gf_group->pyramid_height - 2][dist2top];
|
||||
const int extra_size =
|
||||
(int)(budget_reduced_from_leaf_level * lvl_boost_factor /
|
||||
gf_group->pyramid_lvl_nodes[this_lvl]);
|
||||
#else
|
||||
const int target_frame_size = gf_group->bit_allocation[arf_pos];
|
||||
const int extra_size = target_frame_size >> dist2top;
|
||||
#endif // NEW_MULTI_LVL_BOOST_VBR_ALLOC
|
||||
gf_group->bit_allocation[arf_pos] += extra_size;
|
||||
}
|
||||
++frame_index;
|
||||
@@ -2344,14 +2170,8 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // MULTI_LVL_BOOST_VBR_CQ
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
if (cpi->new_bwdref_update_rule == 0 && rc->source_alt_ref_pending) {
|
||||
#else
|
||||
if (rc->source_alt_ref_pending) {
|
||||
#endif
|
||||
if (cpi->num_extra_arfs) {
|
||||
// NOTE: For bit allocation, move the allocated bits associated with
|
||||
// INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
|
||||
@@ -2379,12 +2199,11 @@ static INLINE int is_almost_static(double gf_zero_motion, int kf_zero_motion) {
|
||||
(kf_zero_motion >= STATIC_KF_GROUP_THRESH);
|
||||
}
|
||||
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
#define ARF_ABS_ZOOM_THRESH 4.4
|
||||
#endif // CONFIG_FIX_GF_LENGTH
|
||||
|
||||
// Analyse and define a gf/arf group.
|
||||
static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
|
||||
const EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
AV1EncoderConfig *const oxcf = &cpi->oxcf;
|
||||
@@ -2394,10 +2213,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
int i;
|
||||
|
||||
double boost_score = 0.0;
|
||||
#if !CONFIG_FIX_GF_LENGTH
|
||||
double old_boost_score = 0.0;
|
||||
int active_max_gf_interval;
|
||||
#endif // !CONFIG_FIX_GF_LENGTH
|
||||
int active_min_gf_interval;
|
||||
double gf_group_err = 0.0;
|
||||
#if GROUP_ADAPTIVE_MAXQ
|
||||
@@ -2427,14 +2242,15 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
int64_t gf_group_bits;
|
||||
double gf_group_error_left;
|
||||
int gf_arf_bits;
|
||||
const int is_key_frame = frame_is_intra_only(cm);
|
||||
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
|
||||
const int is_intra_only = frame_params->frame_type == KEY_FRAME ||
|
||||
frame_params->frame_type == INTRA_ONLY_FRAME;
|
||||
const int arf_active_or_kf = is_intra_only || rc->source_alt_ref_active;
|
||||
|
||||
cpi->extra_arf_allowed = 1;
|
||||
|
||||
// Reset the GF group data structures unless this is a key
|
||||
// frame in which case it will already have been done.
|
||||
if (is_key_frame == 0) {
|
||||
if (!is_intra_only) {
|
||||
av1_zero(twopass->gf_group);
|
||||
}
|
||||
|
||||
@@ -2462,35 +2278,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
const double mv_ratio_accumulator_thresh =
|
||||
(cpi->initial_height + cpi->initial_width) / 4.0;
|
||||
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
// TODO(urvang): Try the 'else' like logic to vary min and max interval.
|
||||
// TODO(urvang): Try logic to vary min and max interval based on q.
|
||||
active_min_gf_interval = rc->min_gf_interval;
|
||||
#else
|
||||
// Set a maximum and minimum interval for the GF group.
|
||||
// If the image appears almost completely static we can extend beyond this.
|
||||
{
|
||||
int int_max_q = (int)(av1_convert_qindex_to_q(
|
||||
twopass->active_worst_quality, cpi->common.seq_params.bit_depth));
|
||||
int int_lbq = (int)(av1_convert_qindex_to_q(
|
||||
rc->last_boosted_qindex, cpi->common.seq_params.bit_depth));
|
||||
|
||||
active_min_gf_interval = rc->min_gf_interval + AOMMIN(2, int_max_q / 200);
|
||||
if (active_min_gf_interval > rc->max_gf_interval)
|
||||
active_min_gf_interval = rc->max_gf_interval;
|
||||
|
||||
// The value chosen depends on the active Q range. At low Q we have
|
||||
// bits to spare and are better with a smaller interval and smaller boost.
|
||||
// At high Q when there are few bits to spare we are better with a longer
|
||||
// interval to spread the cost of the GF.
|
||||
active_max_gf_interval = 12 + AOMMIN(4, (int_lbq / 6));
|
||||
|
||||
// We have: active_min_gf_interval <= rc->max_gf_interval
|
||||
if (active_max_gf_interval < active_min_gf_interval)
|
||||
active_max_gf_interval = active_min_gf_interval;
|
||||
else if (active_max_gf_interval > rc->max_gf_interval)
|
||||
active_max_gf_interval = rc->max_gf_interval;
|
||||
}
|
||||
#endif // CONFIG_FIX_GF_LENGTH
|
||||
|
||||
double avg_sr_coded_error = 0;
|
||||
double avg_raw_err_stdev = 0;
|
||||
@@ -2552,7 +2341,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
boost_score +=
|
||||
decay_accumulator *
|
||||
calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
// If almost totally static, we will not use the the fixed GF length later,
|
||||
// so we can continue for more frames.
|
||||
if (i >= (av1_rc_get_fixed_gf_length(oxcf->gf_max_pyr_height) + 1) &&
|
||||
@@ -2570,39 +2358,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH)) {
|
||||
break;
|
||||
}
|
||||
#else
|
||||
// Break out conditions.
|
||||
// Break at maximum of active_max_gf_interval unless almost totally static.
|
||||
//
|
||||
// Note that the addition of a test of rc->source_alt_ref_active is
|
||||
// deliberate. The effect of this is that after a normal altref group even
|
||||
// if the material is static there will be one normal length GF group
|
||||
// before allowing longer GF groups. The reason for this is that in cases
|
||||
// such as slide shows where slides are separated by a complex transition
|
||||
// such as a fade, the arf group spanning the transition may not be coded
|
||||
// at a very high quality and hence this frame (with its overlay) is a
|
||||
// poor golden frame to use for an extended group.
|
||||
if ((i >= (active_max_gf_interval + arf_active_or_kf) &&
|
||||
((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
|
||||
(
|
||||
// Don't break out with a very short interval.
|
||||
(i >= active_min_gf_interval + arf_active_or_kf) &&
|
||||
(!flash_detected) &&
|
||||
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
|
||||
(abs_mv_in_out_accumulator > 3.0) ||
|
||||
(mv_in_out_accumulator < -2.0) ||
|
||||
((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
|
||||
// If GF group interval is < 12, we force it to be 8. Otherwise,
|
||||
// if it is >= 12, we keep it as is.
|
||||
// NOTE: 'i' is 1 more than the GF group interval candidate that is being
|
||||
// checked.
|
||||
if (i == (8 + 1) || i >= (12 + 1)) {
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
old_boost_score = boost_score;
|
||||
#endif // CONFIG_FIX_GF_LENGTH
|
||||
*this_frame = next_frame;
|
||||
}
|
||||
twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
|
||||
@@ -2638,7 +2393,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
#define REDUCE_GF_LENGTH_TO_KEY_THRESH 9
|
||||
#define REDUCE_GF_LENGTH_BY 1
|
||||
int alt_offset = 0;
|
||||
#if REDUCE_LAST_GF_LENGTH
|
||||
// The length reduction strategy is tweaked using AOM_Q mode, and doesn't work
|
||||
// for VBR mode.
|
||||
// Also, we don't have do adjustment for lossless mode.
|
||||
@@ -2670,7 +2424,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
i -= roll_back;
|
||||
}
|
||||
}
|
||||
#endif // REDUCE_LAST_GF_LENGTH
|
||||
|
||||
// Should we use the alternate reference frame.
|
||||
if (use_alt_ref) {
|
||||
@@ -2713,7 +2466,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
|
||||
}
|
||||
|
||||
#if REDUCE_LAST_ALT_BOOST
|
||||
#define LAST_ALR_BOOST_FACTOR 0.2f
|
||||
rc->arf_boost_factor = 1.0;
|
||||
if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf)) {
|
||||
@@ -2723,7 +2475,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cpi->extra_arf_allowed) {
|
||||
cpi->num_extra_arfs = 0;
|
||||
@@ -2732,7 +2483,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
// Note: When new pyramid structure is used through
|
||||
// 'define_customized_gf_group_structure()' function, this value is
|
||||
// overridden.
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
if (rc->baseline_gf_interval == MIN_GF_INTERVAL &&
|
||||
rc->source_alt_ref_pending) {
|
||||
cpi->num_extra_arfs = 1;
|
||||
@@ -2741,18 +2491,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
rc->source_alt_ref_pending,
|
||||
oxcf->gf_max_pyr_height);
|
||||
}
|
||||
#else
|
||||
cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
|
||||
rc->source_alt_ref_pending,
|
||||
oxcf->gf_max_pyr_height);
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
}
|
||||
|
||||
#if !USE_SYMM_MULTI_LAYER
|
||||
// Currently at maximum two extra ARFs' are allowed
|
||||
assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
|
||||
#endif
|
||||
|
||||
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
|
||||
|
||||
rc->bipred_group_interval = BFG_INTERVAL;
|
||||
@@ -2814,20 +2554,21 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
// also a key frame in which case it has already been accounted for.
|
||||
if (rc->source_alt_ref_pending) {
|
||||
gf_group_error_left = gf_group_err - mod_frame_err;
|
||||
} else if (is_key_frame == 0) {
|
||||
} else if (!is_intra_only) {
|
||||
gf_group_error_left = gf_group_err - gf_first_frame_err;
|
||||
} else {
|
||||
gf_group_error_left = gf_group_err;
|
||||
}
|
||||
|
||||
// Allocate bits to each of the frames in the GF group.
|
||||
allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
|
||||
allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits,
|
||||
frame_params);
|
||||
|
||||
// Reset the file position.
|
||||
reset_fpf_position(twopass, start_pos);
|
||||
|
||||
// Calculate a section intra ratio used in setting max loop filter.
|
||||
if (cpi->common.current_frame.frame_type != KEY_FRAME) {
|
||||
if (frame_params->frame_type != KEY_FRAME) {
|
||||
twopass->section_intra_rating = calculate_section_intra_ratio(
|
||||
start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
|
||||
}
|
||||
@@ -2966,7 +2707,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
|
||||
av1_zero(next_frame);
|
||||
|
||||
cpi->common.current_frame.frame_type = KEY_FRAME;
|
||||
rc->frames_since_key = 0;
|
||||
|
||||
// Reset the GF group data structures.
|
||||
@@ -3195,51 +2935,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
twopass->modified_error_left -= kf_group_err;
|
||||
}
|
||||
|
||||
void av1_configure_buffer_updates_firstpass(AV1_COMP *cpi,
|
||||
FRAME_UPDATE_TYPE update_type) {
|
||||
RATE_CONTROL *rc = &cpi->rc;
|
||||
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
rc->is_bwd_ref_frame = 0;
|
||||
|
||||
switch (update_type) {
|
||||
case ARF_UPDATE:
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
|
||||
rc->is_src_frame_alt_ref = 0;
|
||||
break;
|
||||
case INTNL_ARF_UPDATE:
|
||||
cpi->refresh_alt2_ref_frame = 1;
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
rc->is_src_frame_alt_ref = 0;
|
||||
rc->is_src_frame_ext_arf = 0;
|
||||
|
||||
break;
|
||||
case BIPRED_UPDATE:
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
rc->is_bwd_ref_frame = 1;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static int is_skippable_frame(const AV1_COMP *cpi) {
|
||||
// If the current frame does not have non-zero motion vector detected in the
|
||||
// first pass, and so do its previous and forward frames, then this frame
|
||||
@@ -3259,7 +2954,8 @@ static int is_skippable_frame(const AV1_COMP *cpi) {
|
||||
twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
|
||||
}
|
||||
|
||||
void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
|
||||
void av1_rc_get_second_pass_params(AV1_COMP *cpi,
|
||||
EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
CurrentFrame *const current_frame = &cm->current_frame;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
@@ -3278,16 +2974,16 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
|
||||
// advance the input pointer as we already have what we need.
|
||||
if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
|
||||
gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
|
||||
av1_configure_buffer_updates(cpi);
|
||||
av1_configure_buffer_updates(cpi, gf_group->update_type[gf_group->index]);
|
||||
target_rate = gf_group->bit_allocation[gf_group->index];
|
||||
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
|
||||
rc->base_frame_target = target_rate;
|
||||
|
||||
if (cpi->no_show_kf) {
|
||||
assert(gf_group->update_type[gf_group->index] == ARF_UPDATE);
|
||||
current_frame->frame_type = KEY_FRAME;
|
||||
frame_params->frame_type = KEY_FRAME;
|
||||
} else {
|
||||
current_frame->frame_type = INTER_FRAME;
|
||||
frame_params->frame_type = INTER_FRAME;
|
||||
}
|
||||
|
||||
// Do the firstpass stats indicate that this frame is skippable for the
|
||||
@@ -3342,16 +3038,17 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
|
||||
if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
|
||||
FIRSTPASS_STATS this_frame_copy;
|
||||
this_frame_copy = this_frame;
|
||||
frame_params->frame_type = KEY_FRAME;
|
||||
// Define next KF group and assign bits to it.
|
||||
find_next_key_frame(cpi, &this_frame);
|
||||
this_frame = this_frame_copy;
|
||||
} else {
|
||||
current_frame->frame_type = INTER_FRAME;
|
||||
frame_params->frame_type = INTER_FRAME;
|
||||
}
|
||||
|
||||
// Define a new GF/ARF group. (Should always enter here for key frames).
|
||||
if (rc->frames_till_gf_update_due == 0) {
|
||||
define_gf_group(cpi, &this_frame);
|
||||
define_gf_group(cpi, &this_frame, frame_params);
|
||||
|
||||
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
|
||||
|
||||
@@ -3369,7 +3066,7 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
|
||||
#endif
|
||||
}
|
||||
|
||||
av1_configure_buffer_updates(cpi);
|
||||
av1_configure_buffer_updates(cpi, gf_group->update_type[gf_group->index]);
|
||||
|
||||
// Do the firstpass stats indicate that this frame is skippable for the
|
||||
// partition search?
|
||||
@@ -3379,7 +3076,7 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
|
||||
|
||||
target_rate = gf_group->bit_allocation[gf_group->index];
|
||||
|
||||
if (cpi->common.current_frame.frame_type == KEY_FRAME)
|
||||
if (frame_params->frame_type == KEY_FRAME)
|
||||
target_rate = av1_rc_clamp_iframe_target_size(cpi, target_rate);
|
||||
else
|
||||
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
|
||||
|
||||
+4
-6
@@ -114,12 +114,10 @@ typedef struct {
|
||||
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
unsigned char arf_pos_in_gf[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char pyramid_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char pyramid_height;
|
||||
unsigned char pyramid_lvl_nodes[MAX_PYRAMID_LVL];
|
||||
#endif // USE_SYMM_MULTI_LAYER
|
||||
unsigned char brf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char bidir_pred_enabled[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
@@ -173,16 +171,16 @@ typedef struct {
|
||||
} TWO_PASS;
|
||||
|
||||
struct AV1_COMP;
|
||||
struct EncodeFrameParams;
|
||||
|
||||
void av1_init_first_pass(struct AV1_COMP *cpi);
|
||||
void av1_rc_get_first_pass_params(struct AV1_COMP *cpi);
|
||||
void av1_first_pass(struct AV1_COMP *cpi, const struct lookahead_entry *source);
|
||||
void av1_first_pass(struct AV1_COMP *cpi, const int64_t ts_duration);
|
||||
void av1_end_first_pass(struct AV1_COMP *cpi);
|
||||
|
||||
void av1_init_second_pass(struct AV1_COMP *cpi);
|
||||
void av1_rc_get_second_pass_params(struct AV1_COMP *cpi);
|
||||
void av1_configure_buffer_updates_firstpass(struct AV1_COMP *cpi,
|
||||
FRAME_UPDATE_TYPE update_type);
|
||||
void av1_rc_get_second_pass_params(
|
||||
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
|
||||
|
||||
// Post encode update of the rate control parameters for 2-pass
|
||||
void av1_twopass_postencode_update(struct AV1_COMP *cpi);
|
||||
|
||||
+2
-2
@@ -71,8 +71,8 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
|
||||
xd->mi[0]->mv[0] = x->best_mv;
|
||||
xd->mi[0]->ref_frame[1] = NONE_FRAME;
|
||||
|
||||
av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
|
||||
BLOCK_16X16);
|
||||
av1_enc_build_inter_predictor(&cpi->common, xd, mb_row, mb_col, NULL,
|
||||
BLOCK_16X16, AOM_PLANE_Y, AOM_PLANE_Y);
|
||||
|
||||
/* restore UMV window */
|
||||
x->mv_limits = tmp_mv_limits;
|
||||
|
||||
Vendored
+7
-7
@@ -336,7 +336,7 @@ static unsigned int setup_center_error(
|
||||
int *mvcost[2], unsigned int *sse1, int *distortion) {
|
||||
unsigned int besterr;
|
||||
if (second_pred != NULL) {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
|
||||
uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
|
||||
if (mask) {
|
||||
@@ -641,7 +641,7 @@ static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
|
||||
int mask_stride, int invert_mask, int w, int h,
|
||||
unsigned int *sse, int subpel_search) {
|
||||
unsigned int besterr;
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
|
||||
uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
|
||||
if (second_pred != NULL) {
|
||||
@@ -899,7 +899,8 @@ unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
|
||||
unsigned int mse;
|
||||
unsigned int sse;
|
||||
|
||||
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
|
||||
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
|
||||
AOM_PLANE_Y, AOM_PLANE_Y);
|
||||
mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
|
||||
mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, x->mv_cost_stack,
|
||||
x->errorperbit);
|
||||
@@ -2213,9 +2214,8 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
: av1_get_ref_frame_hash_map(&cpi->common,
|
||||
x->e_mbd.mi[0]->ref_frame[0]);
|
||||
|
||||
av1_get_block_hash_value(
|
||||
what, what_stride, block_width, &hash_value1, &hash_value2,
|
||||
x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
|
||||
av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
|
||||
&hash_value2, is_cur_buf_hbd(&x->e_mbd), x);
|
||||
|
||||
const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
|
||||
// for intra, at lest one matching can be found, itself.
|
||||
@@ -2334,7 +2334,7 @@ static int upsampled_obmc_pref_error(
|
||||
unsigned int besterr;
|
||||
|
||||
DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
|
||||
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
|
||||
subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
|
||||
|
||||
@@ -3784,6 +3784,77 @@ static const NN_CONFIG simple_motion_search_prune_part_nn_config_8 = {
|
||||
#undef NUM_LAYER_0_UNITS_8
|
||||
#undef NUM_LOGITS_8
|
||||
|
||||
#define FEATURE_SIZE 19
|
||||
static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
|
||||
2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
|
||||
0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f,
|
||||
0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f,
|
||||
0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
|
||||
2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f,
|
||||
-1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f,
|
||||
-0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f,
|
||||
0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
|
||||
2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f,
|
||||
-0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f,
|
||||
-1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f,
|
||||
0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
|
||||
1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f,
|
||||
-0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f,
|
||||
-1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f,
|
||||
-0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f,
|
||||
};
|
||||
|
||||
static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
|
||||
2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f,
|
||||
-0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f,
|
||||
-2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f,
|
||||
0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
|
||||
-1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f,
|
||||
-0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f,
|
||||
3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f,
|
||||
-0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
|
||||
-1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f,
|
||||
-4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f,
|
||||
1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f,
|
||||
-0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
|
||||
-2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f,
|
||||
-2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f,
|
||||
0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f,
|
||||
-0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
|
||||
-1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f,
|
||||
-3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f,
|
||||
0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f,
|
||||
-0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f,
|
||||
};
|
||||
|
||||
static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
|
||||
-1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f,
|
||||
-1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f,
|
||||
0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f,
|
||||
0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
+18
-165
@@ -29,6 +29,7 @@
|
||||
#include "av1/common/seg_common.h"
|
||||
|
||||
#include "av1/encoder/encodemv.h"
|
||||
#include "av1/encoder/encode_strategy.h"
|
||||
#include "av1/encoder/random.h"
|
||||
#include "av1/encoder/ratectrl.h"
|
||||
|
||||
@@ -558,13 +559,11 @@ static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
|
||||
arfgf_low_motion_minq, arfgf_high_motion_minq);
|
||||
}
|
||||
|
||||
#if REDUCE_LAST_ALT_BOOST
|
||||
static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) {
|
||||
int *arfgf_high_motion_minq;
|
||||
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
|
||||
return arfgf_high_motion_minq[q];
|
||||
}
|
||||
#endif
|
||||
|
||||
static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) {
|
||||
const RATE_CONTROL *const rc = &cpi->rc;
|
||||
@@ -965,12 +964,8 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
const int bit_depth = cm->seq_params.bit_depth;
|
||||
ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
|
||||
|
||||
#if CUSTOMIZED_GF
|
||||
const int is_intrl_arf_boost =
|
||||
gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
|
||||
#else
|
||||
const int is_intrl_arf_boost = cpi->refresh_alt2_ref_frame;
|
||||
#endif // CUSTOMIZED_GF
|
||||
|
||||
if (frame_is_intra_only(cm)) {
|
||||
if (rc->frames_to_key == 1 && oxcf->rc_mode == AOM_Q) {
|
||||
@@ -1053,17 +1048,14 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
// Constrained quality use slightly lower active best.
|
||||
active_best_quality = active_best_quality * 15 / 16;
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
|
||||
if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
|
||||
(is_intrl_arf_boost && !cpi->new_bwdref_update_rule)) {
|
||||
#if REDUCE_LAST_ALT_BOOST
|
||||
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
|
||||
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
|
||||
const int boost = min_boost - active_best_quality;
|
||||
|
||||
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
|
||||
}
|
||||
#endif // REDUCE_LAST_ALT_BOOST
|
||||
*arf_q = active_best_quality;
|
||||
} else if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
|
||||
assert(rc->arf_q >= 0); // Ensure it is set to a valid value.
|
||||
@@ -1074,7 +1066,6 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
++this_height;
|
||||
}
|
||||
}
|
||||
#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
|
||||
} else if (oxcf->rc_mode == AOM_Q) {
|
||||
if (!cpi->refresh_alt_ref_frame && !is_intrl_arf_boost) {
|
||||
active_best_quality = cq_level;
|
||||
@@ -1082,17 +1073,14 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
|
||||
active_best_quality = get_gf_active_quality(rc, q, bit_depth);
|
||||
*arf_q = active_best_quality;
|
||||
#if REDUCE_LAST_ALT_BOOST
|
||||
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
|
||||
const int boost = min_boost - active_best_quality;
|
||||
|
||||
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
|
||||
#endif
|
||||
} else {
|
||||
assert(rc->arf_q >= 0); // Ensure it is set to a valid value.
|
||||
active_best_quality = rc->arf_q;
|
||||
}
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
|
||||
int this_height = gf_group_pyramid_level(cpi);
|
||||
while (this_height < gf_group->pyramid_height) {
|
||||
@@ -1100,24 +1088,18 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
++this_height;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
// Modify best quality for second level arfs. For mode AOM_Q this
|
||||
// becomes the baseline frame q.
|
||||
if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
|
||||
active_best_quality = (active_best_quality + cq_level + 1) / 2;
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
active_best_quality = get_gf_active_quality(rc, q, bit_depth);
|
||||
#if REDUCE_LAST_ALT_BOOST
|
||||
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
|
||||
const int boost = min_boost - active_best_quality;
|
||||
|
||||
active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
|
||||
#endif
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
|
||||
int this_height = gf_group_pyramid_level(cpi);
|
||||
while (this_height < gf_group->pyramid_height) {
|
||||
@@ -1126,7 +1108,6 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
|
||||
++this_height;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
if (oxcf->rc_mode == AOM_Q) {
|
||||
@@ -1293,16 +1274,12 @@ static void update_alt_ref_frame_stats(AV1_COMP *cpi) {
|
||||
|
||||
static void update_golden_frame_stats(AV1_COMP *cpi) {
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
#if CUSTOMIZED_GF
|
||||
const TWO_PASS *const twopass = &cpi->twopass;
|
||||
const GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
const int is_intrnl_arf =
|
||||
cpi->oxcf.pass == 2
|
||||
? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
|
||||
: cpi->refresh_alt2_ref_frame;
|
||||
#else
|
||||
const int is_intnl_arf = cpi->refresh_alt2_ref_frame;
|
||||
#endif
|
||||
|
||||
// Update the Golden frame usage counts.
|
||||
// NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame,
|
||||
@@ -1328,127 +1305,6 @@ static void update_golden_frame_stats(AV1_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
// Define the reference buffers that will be updated post encode.
|
||||
void av1_configure_buffer_updates(AV1_COMP *cpi) {
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
|
||||
// NOTE(weitinglin): Should we define another function to take care of
|
||||
// cpi->rc.is_$Source_Type to make this function as it is in the comment?
|
||||
|
||||
cpi->rc.is_src_frame_alt_ref = 0;
|
||||
cpi->rc.is_bwd_ref_frame = 0;
|
||||
cpi->rc.is_last_bipred_frame = 0;
|
||||
cpi->rc.is_bipred_frame = 0;
|
||||
cpi->rc.is_src_frame_ext_arf = 0;
|
||||
|
||||
switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
|
||||
case KF_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 1;
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case LF_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
case GF_UPDATE:
|
||||
// TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
|
||||
// needed.
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
case OVERLAY_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_src_frame_alt_ref = 1;
|
||||
break;
|
||||
|
||||
case ARF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
// NOTE: BWDREF does not get updated along with ALTREF_FRAME.
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case BRF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_bwd_ref_frame = 1;
|
||||
break;
|
||||
|
||||
case LAST_BIPRED_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_last_bipred_frame = 1;
|
||||
break;
|
||||
|
||||
case BIPRED_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_bipred_frame = 1;
|
||||
break;
|
||||
|
||||
case INTNL_OVERLAY_UPDATE:
|
||||
cpi->refresh_last_frame = 1;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->rc.is_src_frame_alt_ref = 1;
|
||||
cpi->rc.is_src_frame_ext_arf = 1;
|
||||
break;
|
||||
|
||||
case INTNL_ARF_UPDATE:
|
||||
cpi->refresh_last_frame = 0;
|
||||
cpi->refresh_golden_frame = 0;
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
if (cpi->new_bwdref_update_rule == 1) {
|
||||
cpi->refresh_bwd_ref_frame = 1;
|
||||
cpi->refresh_alt2_ref_frame = 0;
|
||||
} else {
|
||||
#endif
|
||||
cpi->refresh_bwd_ref_frame = 0;
|
||||
cpi->refresh_alt2_ref_frame = 1;
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
}
|
||||
#endif
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
break;
|
||||
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
|
||||
void av1_estimate_qp_gop(AV1_COMP *cpi) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
int gop_length = cpi->rc.baseline_gf_interval;
|
||||
@@ -1463,30 +1319,28 @@ void av1_estimate_qp_gop(AV1_COMP *cpi) {
|
||||
|
||||
cpi->twopass.gf_group.index = idx;
|
||||
rc_set_frame_target(cpi, target_rate, cm->width, cm->height);
|
||||
av1_configure_buffer_updates(cpi);
|
||||
av1_configure_buffer_updates(
|
||||
cpi, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]);
|
||||
tpl_frame->base_qindex = rc_pick_q_and_bounds_two_pass(
|
||||
cpi, cm->width, cm->height, &bottom_index, &top_index, &arf_q);
|
||||
tpl_frame->base_qindex = AOMMAX(tpl_frame->base_qindex, 1);
|
||||
}
|
||||
// Reset the actual index and frame update
|
||||
cpi->twopass.gf_group.index = gf_index;
|
||||
av1_configure_buffer_updates(cpi);
|
||||
av1_configure_buffer_updates(
|
||||
cpi, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]);
|
||||
}
|
||||
|
||||
void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
|
||||
const AV1_COMMON *const cm = &cpi->common;
|
||||
const CurrentFrame *const current_frame = &cm->current_frame;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
#if CUSTOMIZED_GF
|
||||
const TWO_PASS *const twopass = &cpi->twopass;
|
||||
const GF_GROUP *const gf_group = &twopass->gf_group;
|
||||
const int is_intrnl_arf =
|
||||
cpi->oxcf.pass == 2
|
||||
? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
|
||||
: cpi->refresh_alt2_ref_frame;
|
||||
#else
|
||||
const int is_intrnl_arf = cpi->refresh_alt2_ref_frame;
|
||||
#endif
|
||||
|
||||
const int qindex = cm->base_qindex;
|
||||
|
||||
@@ -1618,7 +1472,8 @@ static int calc_iframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
|
||||
return av1_rc_clamp_iframe_target_size(cpi, target);
|
||||
}
|
||||
|
||||
void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
|
||||
void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi,
|
||||
EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
CurrentFrame *const current_frame = &cm->current_frame;
|
||||
@@ -1632,44 +1487,41 @@ void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
|
||||
(current_frame->frame_number == 0 ||
|
||||
(cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 ||
|
||||
(cpi->oxcf.auto_key && 0))) {
|
||||
current_frame->frame_type = KEY_FRAME;
|
||||
frame_params->frame_type = KEY_FRAME;
|
||||
rc->this_key_frame_forced =
|
||||
current_frame->frame_number != 0 && rc->frames_to_key == 0;
|
||||
rc->frames_to_key = cpi->oxcf.key_freq;
|
||||
rc->kf_boost = DEFAULT_KF_BOOST;
|
||||
rc->source_alt_ref_active = 0;
|
||||
} else {
|
||||
current_frame->frame_type = INTER_FRAME;
|
||||
frame_params->frame_type = INTER_FRAME;
|
||||
if (sframe_enabled) {
|
||||
if (altref_enabled) {
|
||||
if (sframe_mode == 1) {
|
||||
// sframe_mode == 1: insert sframe if it matches altref frame.
|
||||
|
||||
if (current_frame->frame_number % sframe_dist == 0 &&
|
||||
current_frame->frame_type != KEY_FRAME &&
|
||||
current_frame->frame_number != 0 && cpi->refresh_alt_ref_frame) {
|
||||
current_frame->frame_type = S_FRAME;
|
||||
frame_params->frame_type = S_FRAME;
|
||||
}
|
||||
} else {
|
||||
// sframe_mode != 1: if sframe will be inserted at the next available
|
||||
// altref frame
|
||||
|
||||
if (current_frame->frame_number % sframe_dist == 0 &&
|
||||
current_frame->frame_type != KEY_FRAME &&
|
||||
current_frame->frame_number != 0) {
|
||||
rc->sframe_due = 1;
|
||||
}
|
||||
|
||||
if (rc->sframe_due && cpi->refresh_alt_ref_frame) {
|
||||
current_frame->frame_type = S_FRAME;
|
||||
frame_params->frame_type = S_FRAME;
|
||||
rc->sframe_due = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (current_frame->frame_number % sframe_dist == 0 &&
|
||||
current_frame->frame_type != KEY_FRAME &&
|
||||
current_frame->frame_number != 0) {
|
||||
current_frame->frame_type = S_FRAME;
|
||||
frame_params->frame_type = S_FRAME;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1692,7 +1544,7 @@ void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
|
||||
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
||||
av1_cyclic_refresh_update_parameters(cpi);
|
||||
|
||||
if (current_frame->frame_type == KEY_FRAME)
|
||||
if (frame_params->frame_type == KEY_FRAME)
|
||||
target = calc_iframe_target_size_one_pass_vbr(cpi);
|
||||
else
|
||||
target = calc_pframe_target_size_one_pass_vbr(cpi);
|
||||
@@ -1758,7 +1610,8 @@ static int calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
|
||||
return av1_rc_clamp_iframe_target_size(cpi, target);
|
||||
}
|
||||
|
||||
void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
|
||||
void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi,
|
||||
EncodeFrameParams *const frame_params) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
CurrentFrame *const current_frame = &cm->current_frame;
|
||||
@@ -1767,14 +1620,14 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
|
||||
if ((current_frame->frame_number == 0 ||
|
||||
(cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 ||
|
||||
(cpi->oxcf.auto_key && 0))) {
|
||||
current_frame->frame_type = KEY_FRAME;
|
||||
frame_params->frame_type = KEY_FRAME;
|
||||
rc->this_key_frame_forced =
|
||||
current_frame->frame_number != 0 && rc->frames_to_key == 0;
|
||||
rc->frames_to_key = cpi->oxcf.key_freq;
|
||||
rc->kf_boost = DEFAULT_KF_BOOST;
|
||||
rc->source_alt_ref_active = 0;
|
||||
} else {
|
||||
current_frame->frame_type = INTER_FRAME;
|
||||
frame_params->frame_type = INTER_FRAME;
|
||||
}
|
||||
if (rc->frames_till_gf_update_due == 0) {
|
||||
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
||||
@@ -1795,7 +1648,7 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
|
||||
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
||||
av1_cyclic_refresh_update_parameters(cpi);
|
||||
|
||||
if (current_frame->frame_type == KEY_FRAME)
|
||||
if (frame_params->frame_type == KEY_FRAME)
|
||||
target = calc_iframe_target_size_one_pass_cbr(cpi);
|
||||
else
|
||||
target = calc_pframe_target_size_one_pass_cbr(cpi);
|
||||
|
||||
+5
-23
@@ -34,27 +34,10 @@ extern "C" {
|
||||
// The maximum duration of a GF group that is static (e.g. a slide show).
|
||||
#define MAX_STATIC_GF_GROUP_LENGTH 250
|
||||
|
||||
#define CUSTOMIZED_GF 1
|
||||
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
// Minimum and maximum height for the new pyramid structure.
|
||||
// (Old structure supports height = 1, but does NOT support height = 4).
|
||||
#define MIN_PYRAMID_LVL 2
|
||||
#define MAX_PYRAMID_LVL 4
|
||||
#define USE_SYMM_MULTI_LAYER 1
|
||||
#define REDUCE_LAST_ALT_BOOST 1
|
||||
#define REDUCE_LAST_GF_LENGTH 1
|
||||
#define MULTI_LVL_BOOST_VBR_CQ 1
|
||||
#else
|
||||
#define USE_SYMM_MULTI_LAYER 0
|
||||
#define REDUCE_LAST_ALT_BOOST 0
|
||||
#define REDUCE_LAST_GF_LENGTH 0
|
||||
#define MULTI_LVL_BOOST_VBR_CQ 0
|
||||
#endif
|
||||
|
||||
#if USE_SYMM_MULTI_LAYER
|
||||
#define USE_MANUAL_GF4_STRUCT 0
|
||||
#endif
|
||||
|
||||
#define MIN_GF_INTERVAL 4
|
||||
#define MAX_GF_INTERVAL 16
|
||||
@@ -191,9 +174,7 @@ int av1_rc_get_default_min_gf_interval(int width, int height, double framerate);
|
||||
int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate,
|
||||
int max_pyr_height);
|
||||
|
||||
#if CONFIG_FIX_GF_LENGTH
|
||||
int av1_rc_get_fixed_gf_length(int max_pyr_height);
|
||||
#endif // CONFIG_FIX_GF_LENGTH
|
||||
|
||||
// Generally at the high level, the following flow is expected
|
||||
// to be enforced for rate control:
|
||||
@@ -218,8 +199,11 @@ int av1_rc_get_fixed_gf_length(int max_pyr_height);
|
||||
|
||||
// Functions to set parameters for encoding before the actual
|
||||
// encode_frame_to_data_rate() function.
|
||||
void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
|
||||
void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
|
||||
struct EncodeFrameParams;
|
||||
void av1_rc_get_one_pass_vbr_params(
|
||||
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
|
||||
void av1_rc_get_one_pass_cbr_params(
|
||||
struct AV1_COMP *cpi, struct EncodeFrameParams *const frame_params);
|
||||
|
||||
// Post encode update of the rate control parameters based
|
||||
// on bytes used
|
||||
@@ -283,8 +267,6 @@ void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height);
|
||||
|
||||
int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
|
||||
|
||||
void av1_configure_buffer_updates(struct AV1_COMP *cpi);
|
||||
|
||||
void av1_estimate_qp_gop(struct AV1_COMP *cpi);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Vendored
+135
-94
@@ -508,6 +508,17 @@ void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
|
||||
av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
|
||||
fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
|
||||
|
||||
for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
|
||||
pcost->base_cost[ctx][4] = 0;
|
||||
pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
|
||||
av1_cost_literal(1) -
|
||||
pcost->base_cost[ctx][0];
|
||||
pcost->base_cost[ctx][6] =
|
||||
pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
|
||||
pcost->base_cost[ctx][7] =
|
||||
pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
|
||||
}
|
||||
|
||||
for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
|
||||
av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
|
||||
fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
|
||||
@@ -538,6 +549,14 @@ void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
|
||||
// printf("%5d ", pcost->lps_cost[ctx][i]);
|
||||
// printf("\n");
|
||||
}
|
||||
for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
|
||||
pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
|
||||
pcost->lps_cost[ctx][0];
|
||||
for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
|
||||
pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
|
||||
pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -698,6 +717,10 @@ static const uint8_t bsize_model_cat_lookup[BLOCK_SIZES_ALL] = {
|
||||
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2
|
||||
};
|
||||
|
||||
static int sse_norm_model_cat_lookup(double sse_norm) {
|
||||
return (sse_norm > 16.0);
|
||||
}
|
||||
|
||||
static const double interp_rgrid_surf[4][33 * 18] = {
|
||||
{
|
||||
29.726102, 30.738006, 25.294088, 25.736759, 41.255961,
|
||||
@@ -1273,8 +1296,9 @@ static const double interp_dgrid_surf[33 * 18] = {
|
||||
0.007205, 0.007205, 0.007203, 0.004341, 0.004340, 0.004338,
|
||||
};
|
||||
|
||||
void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
|
||||
double *rate_f, double *dist_f) {
|
||||
void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
|
||||
double yl, double *rate_f, double *dist_f) {
|
||||
(void)sse_norm;
|
||||
const double x_start = -0.5;
|
||||
const double x_end = 16.5;
|
||||
const double x_step = 1.0;
|
||||
@@ -1283,7 +1307,7 @@ void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
|
||||
const double y_step = 1.0;
|
||||
const double epsilon = 1e-6;
|
||||
const int stride = (int)rint((x_end - x_start) / x_step) + 1;
|
||||
const int cat = bsize_model_cat_lookup[bsize];
|
||||
const int rcat = bsize_model_cat_lookup[bsize];
|
||||
(void)y_end;
|
||||
|
||||
xm = AOMMAX(xm, x_start + x_step + epsilon);
|
||||
@@ -1301,7 +1325,7 @@ void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
|
||||
|
||||
const double yo = y - yi;
|
||||
const double xo = x - xi;
|
||||
const double *prate = &interp_rgrid_surf[cat][(yi - 1) * stride + (xi - 1)];
|
||||
const double *prate = &interp_rgrid_surf[rcat][(yi - 1) * stride + (xi - 1)];
|
||||
const double *pdist = &interp_dgrid_surf[(yi - 1) * stride + (xi - 1)];
|
||||
*rate_f = interp_bicubic(prate, stride, xo, yo);
|
||||
*dist_f = interp_bicubic(pdist, stride, xo, yo);
|
||||
@@ -1311,85 +1335,102 @@ static const double interp_rgrid_curv[4][65] = {
|
||||
{
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 23.801499, 28.387688, 33.388795, 42.298282,
|
||||
41.525408, 51.597692, 49.566271, 54.632979, 60.321507,
|
||||
67.730678, 75.766165, 85.324032, 96.600012, 120.839562,
|
||||
173.917577, 255.974908, 354.107573, 458.063476, 562.345966,
|
||||
668.568424, 772.072881, 878.598490, 982.202274, 1082.708946,
|
||||
1188.037853, 1287.702240, 1395.588773, 1490.825830, 1584.231230,
|
||||
1691.386090, 1766.822555, 1869.630904, 1926.743565, 2002.949495,
|
||||
2047.431137, 2138.486068, 2154.743767, 2209.242472, 2278.252010,
|
||||
2298.028834, 2302.326180, 2293.979995, 2275.826226, 2250.700821,
|
||||
2221.439725, 2190.878887, 2161.854252, 2137.201768, 2119.757381,
|
||||
2112.357039, 2117.836689, 2139.032277, 2178.779750, 2239.915056,
|
||||
},
|
||||
{
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
11.561347, 12.578139, 14.205101, 16.770584, 19.094853,
|
||||
21.330863, 23.298907, 26.901921, 34.501017, 57.891733,
|
||||
112.234763, 194.853189, 288.302032, 380.499422, 472.625309,
|
||||
560.226809, 647.928463, 734.155122, 817.489721, 906.265783,
|
||||
999.260562, 1094.489206, 1197.062998, 1293.296825, 1378.926484,
|
||||
1472.760990, 1552.663779, 1635.196884, 1692.451951, 1759.741063,
|
||||
1822.162720, 1916.515921, 1966.686071, 2031.647506, 2031.381029,
|
||||
2067.971335, 2203.662704, 2500.257936, 3019.559830, 3823.371186,
|
||||
4973.494802, 6531.733478, 8559.890013, 11119.767206, 14273.167855,
|
||||
18081.894761, 22607.750723, 27912.538538, 34058.061008, 41106.120930,
|
||||
2047.431137, 2138.486068, 2154.743767, 2209.242472, 2277.593051,
|
||||
2290.996432, 2307.452938, 2343.567091, 2397.654644, 2469.425868,
|
||||
2558.591037, 2664.860422, 2787.944296, 2927.552932, 3083.396602,
|
||||
3255.185579, 3442.630134, 3645.440541, 3863.327072, 4096.000000,
|
||||
},
|
||||
{
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 8.998436, 9.439592, 9.731837, 10.865931,
|
||||
11.561347, 12.578139, 14.205101, 16.770584, 19.094853,
|
||||
21.330863, 23.298907, 26.901921, 34.501017, 57.891733,
|
||||
112.234763, 194.853189, 288.302032, 380.499422, 472.625309,
|
||||
560.226809, 647.928463, 734.155122, 817.489721, 906.265783,
|
||||
999.260562, 1094.489206, 1197.062998, 1293.296825, 1378.926484,
|
||||
1472.760990, 1552.663779, 1635.196884, 1692.451951, 1759.741063,
|
||||
1822.162720, 1916.515921, 1966.686071, 2031.647506, 2033.700134,
|
||||
2087.847688, 2161.688858, 2242.536028, 2334.023491, 2436.337802,
|
||||
2549.665519, 2674.193198, 2810.107395, 2957.594666, 3116.841567,
|
||||
3288.034655, 3471.360486, 3667.005616, 3875.156602, 4096.000000,
|
||||
},
|
||||
{
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 2.377584, 2.557185, 2.732445, 2.851114,
|
||||
3.281800, 3.765589, 4.342578, 5.145582, 5.611038,
|
||||
6.642238, 7.945977, 11.800522, 17.346624, 37.501413,
|
||||
87.216800, 165.860942, 253.865564, 332.039345, 408.518863,
|
||||
478.120452, 547.268590, 616.067676, 680.022540, 753.863541,
|
||||
834.529973, 919.489191, 1008.264989, 1092.230318, 1173.971886,
|
||||
1249.514122, 1330.510941, 1399.523249, 1466.923387, 1530.533471,
|
||||
1586.515722, 1695.197774, 1746.648696, 1837.136959, 1909.056910,
|
||||
1974.948082, 2063.374132, 2178.496387, 2324.476176, 2505.474827,
|
||||
2725.653666, 2989.174023, 3300.197225, 3662.884600, 4081.397476,
|
||||
4559.897180, 5102.545042, 5713.502387, 6396.930546, 7156.990844,
|
||||
1586.515722, 1695.197774, 1746.648696, 1837.136959, 1909.075485,
|
||||
1975.074651, 2060.159200, 2155.335095, 2259.762505, 2373.710437,
|
||||
2497.447898, 2631.243895, 2775.367434, 2930.087523, 3095.673170,
|
||||
3272.393380, 3460.517161, 3660.313520, 3872.051464, 4096.000000,
|
||||
},
|
||||
{
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.614483, 0.842937, 1.050824, 1.326663, 1.717750,
|
||||
2.530591, 3.582302, 6.995373, 9.973335, 24.042464,
|
||||
56.598240, 113.680735, 180.018689, 231.050567, 266.101082,
|
||||
294.957934, 323.326511, 349.434429, 380.443211, 408.171987,
|
||||
441.214916, 475.716772, 512.900000, 551.186939, 592.364455,
|
||||
624.527378, 661.940693, 679.185473, 724.800679, 764.781792,
|
||||
873.050019, 950.299001, 939.292954, 1052.406153, 1030.816617,
|
||||
1086.316710, 1275.467594, 1671.923018, 2349.336727, 3381.362469,
|
||||
4841.653990, 6803.865037, 9341.649358, 12528.660698, 16438.552805,
|
||||
21144.979426, 26721.594308, 33242.051197, 40780.003840, 49409.105984,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.296997, 0.342545, 0.403097, 0.472889,
|
||||
0.614483, 0.842937, 1.050824, 1.326663, 1.717750,
|
||||
2.530591, 3.582302, 6.995373, 9.973335, 24.042464,
|
||||
56.598240, 113.680735, 180.018689, 231.050567, 266.101082,
|
||||
294.957934, 323.326511, 349.434429, 380.443211, 408.171987,
|
||||
441.214916, 475.716772, 512.900000, 551.186939, 592.364455,
|
||||
624.527378, 661.940693, 679.185473, 724.800679, 764.781792,
|
||||
873.050019, 950.299001, 939.292954, 1052.406153, 1033.893184,
|
||||
1112.182406, 1219.174326, 1337.296681, 1471.648357, 1622.492809,
|
||||
1790.093491, 1974.713858, 2176.617364, 2396.067465, 2633.327614,
|
||||
2888.661266, 3162.331876, 3454.602899, 3765.737789, 4096.000000,
|
||||
},
|
||||
};
|
||||
|
||||
static const double interp_dgrid_curv[65] = {
|
||||
14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855,
|
||||
14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.555776, 14.533692,
|
||||
14.439920, 14.257791, 13.977230, 13.623229, 13.064884, 12.355411, 11.560773,
|
||||
10.728960, 9.861975, 8.643612, 6.916021, 5.154769, 3.734940, 2.680051,
|
||||
1.925506, 1.408410, 1.042223, 0.767641, 0.565392, 0.420116, 0.310427,
|
||||
0.231711, 0.172999, 0.128293, 0.094992, 0.072171, 0.052972, 0.039354,
|
||||
0.029555, 0.022857, 0.016832, 0.013297, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
|
||||
0.000000, 0.000000,
|
||||
static const double interp_dgrid_curv[2][65] = {
|
||||
{
|
||||
16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
|
||||
15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
|
||||
15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
|
||||
13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
|
||||
7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
|
||||
1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
|
||||
0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
|
||||
0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
|
||||
0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
|
||||
0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
|
||||
0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
|
||||
},
|
||||
{
|
||||
16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
|
||||
15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
|
||||
15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
|
||||
13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
|
||||
5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
|
||||
1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
|
||||
0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
|
||||
0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
|
||||
0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
|
||||
0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
|
||||
0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
|
||||
},
|
||||
};
|
||||
|
||||
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
|
||||
double *distbysse_f) {
|
||||
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
|
||||
double *rate_f, double *distbysse_f) {
|
||||
const double x_start = -15.5;
|
||||
const double x_end = 16.5;
|
||||
const double x_step = 0.5;
|
||||
const double epsilon = 1e-6;
|
||||
const int cat = bsize_model_cat_lookup[bsize];
|
||||
const int rcat = bsize_model_cat_lookup[bsize];
|
||||
const int dcat = sse_norm_model_cat_lookup(sse_norm);
|
||||
(void)x_end;
|
||||
|
||||
xqr = AOMMAX(xqr, x_start + x_step + epsilon);
|
||||
@@ -1400,9 +1441,9 @@ void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
|
||||
|
||||
assert(xi > 0);
|
||||
|
||||
const double *prate = &interp_rgrid_curv[cat][(xi - 1)];
|
||||
const double *pdist = &interp_dgrid_curv[(xi - 1)];
|
||||
const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
|
||||
*rate_f = interp_cubic(prate, xo);
|
||||
const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
|
||||
*distbysse_f = interp_cubic(pdist, xo);
|
||||
}
|
||||
|
||||
@@ -1565,7 +1606,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
} else {
|
||||
rd->thresh_mult[THR_NEARESTMV] = 0;
|
||||
rd->thresh_mult[THR_NEARESTL2] = 0;
|
||||
rd->thresh_mult[THR_NEARESTL3] = 0;
|
||||
rd->thresh_mult[THR_NEARESTL3] = 100;
|
||||
rd->thresh_mult[THR_NEARESTB] = 0;
|
||||
rd->thresh_mult[THR_NEARESTA2] = 0;
|
||||
rd->thresh_mult[THR_NEARESTA] = 0;
|
||||
@@ -1576,7 +1617,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_NEWL2] += 1000;
|
||||
rd->thresh_mult[THR_NEWL3] += 1000;
|
||||
rd->thresh_mult[THR_NEWB] += 1000;
|
||||
rd->thresh_mult[THR_NEWA2] = 1000;
|
||||
rd->thresh_mult[THR_NEWA2] = 1100;
|
||||
rd->thresh_mult[THR_NEWA] += 1000;
|
||||
rd->thresh_mult[THR_NEWG] += 1000;
|
||||
|
||||
@@ -1588,18 +1629,18 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_NEARA] += 1000;
|
||||
rd->thresh_mult[THR_NEARG] += 1000;
|
||||
|
||||
rd->thresh_mult[THR_GLOBALMV] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALMV] += 2200;
|
||||
rd->thresh_mult[THR_GLOBALL2] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALL3] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALB] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALB] += 2400;
|
||||
rd->thresh_mult[THR_GLOBALA2] = 2000;
|
||||
rd->thresh_mult[THR_GLOBALG] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALA] += 2000;
|
||||
rd->thresh_mult[THR_GLOBALA] += 2400;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1100;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 800;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 900;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
|
||||
@@ -1617,17 +1658,17 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1530;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1870;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2400;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2750;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1870;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 1800;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] += 2500;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
|
||||
@@ -1636,23 +1677,23 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 2500;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 3000;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1320;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 2040;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2500;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2250;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1360;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2400;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2250;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
|
||||
@@ -1665,7 +1706,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1870;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] += 2500;
|
||||
@@ -1679,7 +1720,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] += 2500;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1800;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700;
|
||||
@@ -1694,7 +1735,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] += 2500;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1440;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700;
|
||||
@@ -1708,29 +1749,29 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2500;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2750;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1600;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2640;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2400;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] += 3200;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1600;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 1800;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2400;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] += 3200;
|
||||
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1600;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1760;
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2400;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 1760;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2640;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2400;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] += 3200;
|
||||
|
||||
@@ -1738,21 +1779,21 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
|
||||
rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 2000;
|
||||
rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARBA] += 2200;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2400;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEARBA] += 1980;
|
||||
rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2640;
|
||||
rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] += 3200;
|
||||
|
||||
rd->thresh_mult[THR_DC] += 1000;
|
||||
rd->thresh_mult[THR_PAETH] += 1000;
|
||||
rd->thresh_mult[THR_SMOOTH] += 2000;
|
||||
rd->thresh_mult[THR_SMOOTH] += 2200;
|
||||
rd->thresh_mult[THR_SMOOTH_V] += 2000;
|
||||
rd->thresh_mult[THR_SMOOTH_H] += 2000;
|
||||
rd->thresh_mult[THR_H_PRED] += 2000;
|
||||
rd->thresh_mult[THR_V_PRED] += 2000;
|
||||
rd->thresh_mult[THR_V_PRED] += 1800;
|
||||
rd->thresh_mult[THR_D135_PRED] += 2500;
|
||||
rd->thresh_mult[THR_D203_PRED] += 2500;
|
||||
rd->thresh_mult[THR_D203_PRED] += 2000;
|
||||
rd->thresh_mult[THR_D157_PRED] += 2500;
|
||||
rd->thresh_mult[THR_D67_PRED] += 2500;
|
||||
rd->thresh_mult[THR_D67_PRED] += 2000;
|
||||
rd->thresh_mult[THR_D113_PRED] += 2500;
|
||||
rd->thresh_mult[THR_D45_PRED] += 2500;
|
||||
}
|
||||
|
||||
Vendored
+3
-3
@@ -656,10 +656,10 @@ void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
|
||||
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
|
||||
unsigned int qstep, int *rate, int64_t *dist);
|
||||
|
||||
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double xqr, double *rate_f,
|
||||
double *distbysse_f);
|
||||
void av1_model_rd_surffit(BLOCK_SIZE bsize, double xm, double yl,
|
||||
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
|
||||
double *rate_f, double *distbysse_f);
|
||||
void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
|
||||
double yl, double *rate_f, double *distbysse_f);
|
||||
|
||||
int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
|
||||
const MACROBLOCKD *xd);
|
||||
|
||||
Vendored
+391
-300
File diff suppressed because it is too large
Load Diff
Vendored
-2
@@ -151,10 +151,8 @@ typedef struct {
|
||||
|
||||
sobel_xy sobel(const uint8_t *input, int stride, int i, int j, bool high_bd);
|
||||
|
||||
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
|
||||
void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
|
||||
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+30
-52
@@ -237,46 +237,19 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd, BLOCK_SIZE bsize,
|
||||
int mi_row, int mi_col,
|
||||
int plane_from, int plane_to) {
|
||||
int plane;
|
||||
static void build_inter_predictors_for_plane(const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd, int mi_row,
|
||||
int mi_col, const BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize, int plane_idx) {
|
||||
const struct macroblockd_plane *pd = &xd->plane[plane_idx];
|
||||
if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
|
||||
pd->subsampling_y))
|
||||
return;
|
||||
|
||||
const int mi_x = mi_col * MI_SIZE;
|
||||
const int mi_y = mi_row * MI_SIZE;
|
||||
for (plane = plane_from; plane <= plane_to; ++plane) {
|
||||
const struct macroblockd_plane *pd = &xd->plane[plane];
|
||||
const int bw = pd->width;
|
||||
const int bh = pd->height;
|
||||
|
||||
if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
|
||||
pd->subsampling_y))
|
||||
continue;
|
||||
|
||||
build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
|
||||
}
|
||||
}
|
||||
|
||||
void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, 0);
|
||||
}
|
||||
|
||||
void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
for (int plane_idx = 1; plane_idx < MAX_MB_PLANE; plane_idx++) {
|
||||
av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize,
|
||||
plane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize, int plane_idx) {
|
||||
build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, plane_idx,
|
||||
plane_idx);
|
||||
build_inter_predictors(cm, xd, plane_idx, xd->mi[0], 0, pd->width, pd->height,
|
||||
mi_x, mi_y);
|
||||
|
||||
if (is_interintra_pred(xd->mi[0])) {
|
||||
BUFFER_SET default_ctx = { { NULL, NULL, NULL }, { 0, 0, 0 } };
|
||||
@@ -291,13 +264,14 @@ void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
}
|
||||
}
|
||||
|
||||
void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize) {
|
||||
const int num_planes = av1_num_planes(cm);
|
||||
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
|
||||
if (num_planes > 1)
|
||||
av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
|
||||
void av1_enc_build_inter_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col,
|
||||
const BUFFER_SET *ctx, BLOCK_SIZE bsize,
|
||||
int plane_from, int plane_to) {
|
||||
for (int plane_idx = plane_from; plane_idx <= plane_to; ++plane_idx) {
|
||||
build_inter_predictors_for_plane(cm, xd, mi_row, mi_col, ctx, bsize,
|
||||
plane_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(sarahparker):
|
||||
@@ -453,7 +427,7 @@ void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
|
||||
int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
int len = sizeof(uint16_t);
|
||||
dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
|
||||
dst_buf1[1] =
|
||||
@@ -576,37 +550,41 @@ static void build_wedge_inter_predictor_from_buf(
|
||||
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
|
||||
mbmi->interinter_comp.seg_mask = xd->seg_mask;
|
||||
const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
|
||||
const int is_hbd = is_cur_buf_hbd(xd);
|
||||
|
||||
if (is_compound && is_masked_compound_type(comp_data->type)) {
|
||||
if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
if (is_hbd) {
|
||||
av1_build_compound_diffwtd_mask_highbd(
|
||||
comp_data->seg_mask, comp_data->mask_type,
|
||||
CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
|
||||
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
|
||||
else
|
||||
} else {
|
||||
av1_build_compound_diffwtd_mask(
|
||||
comp_data->seg_mask, comp_data->mask_type, ext_dst0,
|
||||
ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w);
|
||||
}
|
||||
}
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
if (is_hbd) {
|
||||
build_masked_compound_highbd(
|
||||
dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
|
||||
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data,
|
||||
mbmi->sb_type, h, w, xd->bd);
|
||||
else
|
||||
} else {
|
||||
build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
|
||||
ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type,
|
||||
h, w);
|
||||
}
|
||||
} else {
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
if (is_hbd) {
|
||||
aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
|
||||
dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
|
||||
xd->bd);
|
||||
else
|
||||
} else {
|
||||
aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
|
||||
0, NULL, 0, w, h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+4
-15
@@ -23,21 +23,10 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize, int plane_idx);
|
||||
|
||||
void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col, BUFFER_SET *ctx,
|
||||
BLOCK_SIZE bsize);
|
||||
void av1_enc_build_inter_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col,
|
||||
const BUFFER_SET *ctx, BLOCK_SIZE bsize,
|
||||
int plane_from, int plane_to);
|
||||
|
||||
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, const MV *src_mv,
|
||||
|
||||
+34
-34
@@ -80,7 +80,7 @@ static int frame_is_boosted(const AV1_COMP *cpi) {
|
||||
// partly on the screen area that over which they propogate. Propogation is
|
||||
// limited by transform block size but the screen area take up by a given block
|
||||
// size will be larger for a small image format stretched to full screen.
|
||||
static BLOCK_SIZE set_partition_min_limit(AV1_COMMON *const cm) {
|
||||
static BLOCK_SIZE set_partition_min_limit(const AV1_COMMON *const cm) {
|
||||
unsigned int screen_area = (cm->width * cm->height);
|
||||
|
||||
// Select block size based on image format size.
|
||||
@@ -103,10 +103,9 @@ static int has_internal_image_edge(const AV1_COMP *cpi) {
|
||||
(cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
|
||||
}
|
||||
|
||||
static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
|
||||
SPEED_FEATURES *sf,
|
||||
int speed) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
static void set_good_speed_feature_framesize_dependent(
|
||||
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
|
||||
const AV1_COMMON *const cm = &cpi->common;
|
||||
const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
|
||||
const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
|
||||
|
||||
@@ -201,11 +200,12 @@ static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
|
||||
}
|
||||
}
|
||||
|
||||
static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
SPEED_FEATURES *sf,
|
||||
int speed) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
static void set_good_speed_features_framesize_independent(
|
||||
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
|
||||
const AV1_COMMON *const cm = &cpi->common;
|
||||
const int boosted = frame_is_boosted(cpi);
|
||||
const int is_boosted_arf2_bwd_type =
|
||||
boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame;
|
||||
|
||||
// Speed 0 for all speed features that give neutral coding performance change.
|
||||
sf->reduce_inter_modes = 1;
|
||||
@@ -213,6 +213,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
sf->ml_prune_rect_partition = 1;
|
||||
sf->ml_prune_ab_partition = 1;
|
||||
sf->ml_prune_4_partition = 1;
|
||||
sf->simple_motion_search_prune_rect = 1;
|
||||
sf->adaptive_txb_search_level = 1;
|
||||
sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
|
||||
sf->model_based_prune_tx_search_level = 1;
|
||||
@@ -222,8 +223,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
// TODO(debargha): Test, tweak and turn on either 1 or 2
|
||||
sf->inter_mode_rd_model_estimation = 1;
|
||||
|
||||
sf->prune_ref_frame_for_rect_partitions =
|
||||
!(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
|
||||
sf->prune_ref_frame_for_rect_partitions = !is_boosted_arf2_bwd_type;
|
||||
sf->prune_ref_mode_for_partitions = sf->prune_ref_frame_for_rect_partitions;
|
||||
sf->less_rectangular_check_level = 1;
|
||||
sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
|
||||
@@ -270,7 +270,6 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
sf->prune_single_motion_modes_by_simple_trans = 1;
|
||||
|
||||
sf->simple_motion_search_split_only = 1;
|
||||
sf->simple_motion_search_prune_rect = 1;
|
||||
|
||||
sf->disable_wedge_search_var_thresh = 0;
|
||||
sf->disable_wedge_search_edge_thresh = 0;
|
||||
@@ -322,7 +321,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
// See aomedia:1778.
|
||||
// sf->adaptive_motion_search = 1;
|
||||
sf->recode_loop = ALLOW_RECODE_KFARFGF;
|
||||
sf->use_transform_domain_distortion = 1;
|
||||
sf->use_transform_domain_distortion = boosted ? 1 : 2;
|
||||
sf->use_accurate_subpel_search = USE_2_TAPS;
|
||||
sf->adaptive_rd_thresh = 2;
|
||||
sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
|
||||
@@ -333,24 +332,22 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
// TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
|
||||
// it with cpi->sf.disable_wedge_search_var_thresh.
|
||||
sf->disable_wedge_interintra_search = 1;
|
||||
sf->perform_coeff_opt = boosted ? 0 : 3;
|
||||
sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 3;
|
||||
}
|
||||
|
||||
if (speed >= 4) {
|
||||
sf->use_intra_txb_hash = 0;
|
||||
sf->use_mb_rd_hash = 0;
|
||||
sf->tx_type_search.fast_intra_tx_type_search = 1;
|
||||
sf->use_square_partition_only_threshold =
|
||||
boosted ? BLOCK_128X128 : BLOCK_4X4;
|
||||
sf->tx_size_search_method =
|
||||
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
|
||||
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
|
||||
sf->adaptive_pred_interp_filter = 0;
|
||||
sf->adaptive_mode_search = 1;
|
||||
sf->cb_partition_search = !boosted;
|
||||
sf->alt_ref_search_fp = 1;
|
||||
sf->skip_sharp_interp_filter_search = 1;
|
||||
sf->perform_coeff_opt = boosted ? 0 : 4;
|
||||
sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
|
||||
sf->adaptive_txb_search_level = boosted ? 2 : 3;
|
||||
}
|
||||
|
||||
if (speed >= 5) {
|
||||
@@ -400,6 +397,8 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
sf->mv.search_method = FAST_HEX;
|
||||
sf->partition_search_type = REFERENCE_PARTITION;
|
||||
sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
|
||||
// TODO(any): evaluate adaptive_mode_search=1 for speed 7 & 8
|
||||
sf->adaptive_mode_search = 2;
|
||||
}
|
||||
if (speed >= 8) {
|
||||
sf->mv.search_method = FAST_DIAMOND;
|
||||
@@ -408,12 +407,12 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
|
||||
}
|
||||
}
|
||||
|
||||
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
|
||||
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
|
||||
SPEED_FEATURES *const sf = &cpi->sf;
|
||||
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
|
||||
|
||||
if (oxcf->mode == GOOD) {
|
||||
set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
|
||||
set_good_speed_feature_framesize_dependent(cpi, sf, speed);
|
||||
}
|
||||
|
||||
if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
|
||||
@@ -427,7 +426,7 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
|
||||
cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
|
||||
}
|
||||
|
||||
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
|
||||
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
|
||||
AV1_COMMON *const cm = &cpi->common;
|
||||
SPEED_FEATURES *const sf = &cpi->sf;
|
||||
MACROBLOCK *const x = &cpi->td.mb;
|
||||
@@ -576,7 +575,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
|
||||
sf->perform_coeff_opt = 0;
|
||||
|
||||
if (oxcf->mode == GOOD)
|
||||
set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
|
||||
set_good_speed_features_framesize_independent(cpi, sf, speed);
|
||||
|
||||
if (!cpi->seq_params_locked) {
|
||||
cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;
|
||||
@@ -591,28 +590,31 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
|
||||
cpi->diamond_search_sad = av1_diamond_search_sad;
|
||||
|
||||
sf->allow_exhaustive_searches = 1;
|
||||
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
|
||||
|
||||
const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
|
||||
sf->exhaustive_searches_thresh = (1 << 24);
|
||||
else
|
||||
sf->exhaustive_searches_thresh = (1 << 25);
|
||||
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
|
||||
if (speed > 0)
|
||||
sf->max_exaustive_pct = good_quality_max_mesh_pct[mesh_speed];
|
||||
if (mesh_speed > 0)
|
||||
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
|
||||
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
|
||||
sf->mesh_patterns[i].range =
|
||||
good_quality_mesh_patterns[mesh_speed][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
good_quality_mesh_patterns[speed][i].interval;
|
||||
good_quality_mesh_patterns[mesh_speed][i].interval;
|
||||
}
|
||||
if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
|
||||
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
|
||||
cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
sf->mesh_patterns[i].range = intrabc_mesh_patterns[speed][i].range;
|
||||
sf->mesh_patterns[i].interval = intrabc_mesh_patterns[speed][i].interval;
|
||||
sf->mesh_patterns[i].range = intrabc_mesh_patterns[mesh_speed][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
intrabc_mesh_patterns[mesh_speed][i].interval;
|
||||
}
|
||||
sf->max_exaustive_pct = intrabc_max_mesh_pct[speed];
|
||||
sf->max_exaustive_pct = intrabc_max_mesh_pct[mesh_speed];
|
||||
}
|
||||
|
||||
// Slow quant, dct and trellis not worthwhile for first pass
|
||||
@@ -638,7 +640,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
|
||||
cpi->optimize_speed_feature =
|
||||
oxcf->pass != 1 ? sf->optimize_coefficients : NO_TRELLIS_OPT;
|
||||
// FIXME: trellis not very efficient for quantisation matrices
|
||||
if (cm->using_qmatrix) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
|
||||
if (oxcf->using_qm) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
|
||||
if (oxcf->disable_trellis_quant) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
|
||||
|
||||
x->min_partition_size = sf->default_min_partition_size;
|
||||
@@ -653,9 +655,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
|
||||
comp_type_rd_threshold_mul[sf->prune_comp_type_by_comp_avg];
|
||||
cpi->max_comp_type_rd_threshold_div =
|
||||
comp_type_rd_threshold_div[sf->prune_comp_type_by_comp_avg];
|
||||
int tx_domain_speed = (oxcf->speed >= MAX_TX_DOMAIN_EVAL_SPEED)
|
||||
? MAX_TX_DOMAIN_EVAL_SPEED
|
||||
: oxcf->speed;
|
||||
const int tx_domain_speed = AOMMIN(speed, MAX_TX_DOMAIN_EVAL_SPEED);
|
||||
cpi->tx_domain_dist_threshold = tx_domain_dist_thresholds[tx_domain_speed];
|
||||
|
||||
// assert ensures that coeff_opt_dist_thresholds is accessed correctly
|
||||
|
||||
+4
-2
@@ -656,8 +656,10 @@ typedef struct SPEED_FEATURES {
|
||||
|
||||
struct AV1_COMP;
|
||||
|
||||
void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi);
|
||||
void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi);
|
||||
void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi,
|
||||
int speed);
|
||||
void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi,
|
||||
int speed);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+9
-7
@@ -765,7 +765,8 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
|
||||
// Save input state
|
||||
uint8_t *input_buffer[MAX_MB_PLANE];
|
||||
int i;
|
||||
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
const int is_hbd = is_cur_buf_hbd(mbd);
|
||||
if (is_hbd) {
|
||||
predictor = CONVERT_TO_BYTEPTR(predictor16);
|
||||
} else {
|
||||
predictor = predictor8;
|
||||
@@ -887,20 +888,21 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
|
||||
const unsigned int w = plane ? mb_uv_width : BW;
|
||||
const unsigned int h = plane ? mb_uv_height : BH;
|
||||
|
||||
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
if (is_hbd) {
|
||||
highbd_apply_temporal_filter_self(pred, pred_stride, w, h,
|
||||
blk_fw[0], accum, cnt);
|
||||
else
|
||||
} else {
|
||||
apply_temporal_filter_self(pred, pred_stride, w, h, blk_fw[0],
|
||||
accum, cnt);
|
||||
}
|
||||
|
||||
pred += BLK_PELS;
|
||||
accum += BLK_PELS;
|
||||
cnt += BLK_PELS;
|
||||
}
|
||||
} else {
|
||||
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
int adj_strength = strength + 2 * (mbd->bd - 8);
|
||||
if (is_hbd) {
|
||||
const int adj_strength = strength + 2 * (mbd->bd - 8);
|
||||
|
||||
if (num_planes <= 1) {
|
||||
// Single plane case
|
||||
@@ -943,7 +945,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
|
||||
}
|
||||
|
||||
// Normalize filter output to produce AltRef frame
|
||||
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_hbd) {
|
||||
uint16_t *dst1_16;
|
||||
uint16_t *dst2_16;
|
||||
dst1 = cpi->alt_ref_buffer.y_buffer;
|
||||
@@ -1139,7 +1141,7 @@ static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
|
||||
MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
|
||||
struct lookahead_entry *buf = av1_lookahead_peek(cpi->lookahead, distance);
|
||||
double noiselevel;
|
||||
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (is_cur_buf_hbd(mbd)) {
|
||||
noiselevel = highbd_estimate_noise(
|
||||
buf->img.y_buffer, buf->img.y_crop_width, buf->img.y_crop_height,
|
||||
buf->img.y_stride, mbd->bd, EDGE_THRESHOLD);
|
||||
|
||||
+595
@@ -0,0 +1,595 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
#include "aom/aom_codec.h"
|
||||
|
||||
#include "av1/common/onyxc_int.h"
|
||||
#include "av1/common/reconintra.h"
|
||||
|
||||
#include "av1/encoder/encoder.h"
|
||||
#include "av1/encoder/reconinter_enc.h"
|
||||
|
||||
typedef struct GF_PICTURE {
|
||||
YV12_BUFFER_CONFIG *frame;
|
||||
int ref_frame[7];
|
||||
} GF_PICTURE;
|
||||
|
||||
static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
|
||||
tran_low_t *qcoeff, tran_low_t *dqcoeff,
|
||||
TX_SIZE tx_size, int64_t *recon_error,
|
||||
int64_t *sse) {
|
||||
const struct macroblock_plane *const p = &x->plane[plane];
|
||||
const SCAN_ORDER *const scan_order = &av1_default_scan_orders[tx_size];
|
||||
uint16_t eob;
|
||||
int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
|
||||
const int shift = tx_size == TX_32X32 ? 0 : 2;
|
||||
|
||||
av1_quantize_fp_32x32(coeff, pix_num, p->zbin_QTX, p->round_fp_QTX,
|
||||
p->quant_fp_QTX, p->quant_shift_QTX, qcoeff, dqcoeff,
|
||||
p->dequant_QTX, &eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
|
||||
*recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
|
||||
*recon_error = AOMMAX(*recon_error, 1);
|
||||
|
||||
*sse = (*sse) >> shift;
|
||||
*sse = AOMMAX(*sse, 1);
|
||||
}
|
||||
|
||||
static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
|
||||
TX_SIZE tx_size) {
|
||||
switch (tx_size) {
|
||||
case TX_8X8: aom_hadamard_8x8(src_diff, bw, coeff); break;
|
||||
case TX_16X16: aom_hadamard_16x16(src_diff, bw, coeff); break;
|
||||
case TX_32X32: aom_hadamard_32x32(src_diff, bw, coeff); break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t motion_compensated_prediction(AV1_COMP *cpi, ThreadData *td,
|
||||
uint8_t *cur_frame_buf,
|
||||
uint8_t *ref_frame_buf,
|
||||
int stride, BLOCK_SIZE bsize,
|
||||
int mi_row, int mi_col) {
|
||||
AV1_COMMON *cm = &cpi->common;
|
||||
MACROBLOCK *const x = &td->mb;
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
|
||||
const SEARCH_METHODS search_method = NSTEP;
|
||||
int step_param;
|
||||
int sadpb = x->sadperbit16;
|
||||
uint32_t bestsme = UINT_MAX;
|
||||
int distortion;
|
||||
uint32_t sse;
|
||||
int cost_list[5];
|
||||
const MvLimits tmp_mv_limits = x->mv_limits;
|
||||
|
||||
MV best_ref_mv1 = { 0, 0 };
|
||||
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
|
||||
|
||||
best_ref_mv1_full.col = best_ref_mv1.col >> 3;
|
||||
best_ref_mv1_full.row = best_ref_mv1.row >> 3;
|
||||
|
||||
// Setup frame pointers
|
||||
x->plane[0].src.buf = cur_frame_buf;
|
||||
x->plane[0].src.stride = stride;
|
||||
xd->plane[0].pre[0].buf = ref_frame_buf;
|
||||
xd->plane[0].pre[0].stride = stride;
|
||||
|
||||
step_param = mv_sf->reduce_first_step_size;
|
||||
step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
|
||||
|
||||
av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
|
||||
|
||||
av1_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
|
||||
search_method, 0, sadpb, cond_cost_list(cpi, cost_list),
|
||||
&best_ref_mv1, INT_MAX, 0, (MI_SIZE * mi_col),
|
||||
(MI_SIZE * mi_row), 0);
|
||||
|
||||
/* restore UMV window */
|
||||
x->mv_limits = tmp_mv_limits;
|
||||
|
||||
const int pw = block_size_wide[bsize];
|
||||
const int ph = block_size_high[bsize];
|
||||
bestsme = cpi->find_fractional_mv_step(
|
||||
x, cm, mi_row, mi_col, &best_ref_mv1, cpi->common.allow_high_precision_mv,
|
||||
x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_iters_per_step,
|
||||
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, NULL,
|
||||
0, 0, pw, ph, 1, 1);
|
||||
|
||||
return bestsme;
|
||||
}
|
||||
|
||||
static void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
|
||||
struct scale_factors *sf, GF_PICTURE *gf_picture,
|
||||
int frame_idx, int16_t *src_diff, tran_low_t *coeff,
|
||||
tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
|
||||
int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
|
||||
YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
|
||||
int64_t *recon_error, int64_t *sse,
|
||||
TplDepStats *tpl_stats) {
|
||||
AV1_COMMON *cm = &cpi->common;
|
||||
ThreadData *td = &cpi->td;
|
||||
|
||||
const int bw = 4 << mi_size_wide_log2[bsize];
|
||||
const int bh = 4 << mi_size_high_log2[bsize];
|
||||
const int pix_num = bw * bh;
|
||||
int best_rf_idx = -1;
|
||||
int_mv best_mv;
|
||||
int64_t best_inter_cost = INT64_MAX;
|
||||
int64_t inter_cost;
|
||||
int rf_idx;
|
||||
const InterpFilters kernel =
|
||||
av1_make_interp_filters(EIGHTTAP_REGULAR, EIGHTTAP_REGULAR);
|
||||
|
||||
int64_t best_intra_cost = INT64_MAX;
|
||||
int64_t intra_cost;
|
||||
PREDICTION_MODE mode;
|
||||
int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
|
||||
MB_MODE_INFO mi_above, mi_left;
|
||||
|
||||
memset(tpl_stats, 0, sizeof(*tpl_stats));
|
||||
|
||||
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
|
||||
xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
|
||||
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
|
||||
xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
|
||||
xd->above_mbmi = (mi_row > 0) ? &mi_above : NULL;
|
||||
xd->left_mbmi = (mi_col > 0) ? &mi_left : NULL;
|
||||
|
||||
// Intra prediction search
|
||||
for (mode = DC_PRED; mode <= PAETH_PRED; ++mode) {
|
||||
uint8_t *src, *dst;
|
||||
int src_stride, dst_stride;
|
||||
|
||||
src = xd->cur_buf->y_buffer + mb_y_offset;
|
||||
src_stride = xd->cur_buf->y_stride;
|
||||
|
||||
dst = &predictor[0];
|
||||
dst_stride = bw;
|
||||
|
||||
xd->mi[0]->sb_type = bsize;
|
||||
xd->mi[0]->ref_frame[0] = INTRA_FRAME;
|
||||
|
||||
av1_predict_intra_block(
|
||||
cm, xd, block_size_wide[bsize], block_size_high[bsize], tx_size, mode,
|
||||
0, 0, FILTER_INTRA_MODES, src, src_stride, dst, dst_stride, 0, 0, 0);
|
||||
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
aom_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
|
||||
dst_stride, xd->bd);
|
||||
} else {
|
||||
aom_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
|
||||
dst_stride);
|
||||
}
|
||||
|
||||
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
|
||||
|
||||
intra_cost = aom_satd(coeff, pix_num);
|
||||
|
||||
if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
|
||||
}
|
||||
|
||||
// Motion compensated prediction
|
||||
best_mv.as_int = 0;
|
||||
|
||||
(void)mb_y_offset;
|
||||
// Motion estimation column boundary
|
||||
x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND));
|
||||
x->mv_limits.col_max =
|
||||
((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND);
|
||||
|
||||
for (rf_idx = 0; rf_idx < 7; ++rf_idx) {
|
||||
if (ref_frame[rf_idx] == NULL) continue;
|
||||
|
||||
motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
|
||||
ref_frame[rf_idx]->y_buffer + mb_y_offset,
|
||||
xd->cur_buf->y_stride, bsize, mi_row, mi_col);
|
||||
|
||||
// TODO(jingning): Not yet support high bit-depth in the next three
|
||||
// steps.
|
||||
ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
|
||||
WarpTypesAllowed warp_types;
|
||||
memset(&warp_types, 0, sizeof(WarpTypesAllowed));
|
||||
|
||||
av1_build_inter_predictor(
|
||||
ref_frame[rf_idx]->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_stride,
|
||||
&predictor[0], bw, &x->best_mv.as_mv, sf, bw, bh, &conv_params, kernel,
|
||||
&warp_types, mi_col * MI_SIZE, mi_row * MI_SIZE, 0, 0, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE, mi_row * MI_SIZE, xd, 0);
|
||||
if (is_cur_buf_hbd(xd)) {
|
||||
aom_highbd_subtract_block(
|
||||
bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
|
||||
xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
|
||||
} else {
|
||||
aom_subtract_block(bh, bw, src_diff, bw,
|
||||
xd->cur_buf->y_buffer + mb_y_offset,
|
||||
xd->cur_buf->y_stride, &predictor[0], bw);
|
||||
}
|
||||
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
|
||||
|
||||
inter_cost = aom_satd(coeff, pix_num);
|
||||
if (inter_cost < best_inter_cost) {
|
||||
best_rf_idx = rf_idx;
|
||||
best_inter_cost = inter_cost;
|
||||
best_mv.as_int = x->best_mv.as_int;
|
||||
get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
|
||||
sse);
|
||||
}
|
||||
}
|
||||
best_intra_cost = AOMMAX(best_intra_cost, 1);
|
||||
best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost);
|
||||
tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
|
||||
tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
|
||||
tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow;
|
||||
|
||||
tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
|
||||
tpl_stats->mv.as_int = best_mv.as_int;
|
||||
}
|
||||
|
||||
static int round_floor(int ref_pos, int bsize_pix) {
|
||||
int round;
|
||||
if (ref_pos < 0)
|
||||
round = -(1 + (-ref_pos - 1) / bsize_pix);
|
||||
else
|
||||
round = ref_pos / bsize_pix;
|
||||
|
||||
return round;
|
||||
}
|
||||
|
||||
static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
|
||||
int ref_pos_col, int block, BLOCK_SIZE bsize) {
|
||||
int width = 0, height = 0;
|
||||
int bw = 4 << mi_size_wide_log2[bsize];
|
||||
int bh = 4 << mi_size_high_log2[bsize];
|
||||
|
||||
switch (block) {
|
||||
case 0:
|
||||
width = grid_pos_col + bw - ref_pos_col;
|
||||
height = grid_pos_row + bh - ref_pos_row;
|
||||
break;
|
||||
case 1:
|
||||
width = ref_pos_col + bw - grid_pos_col;
|
||||
height = grid_pos_row + bh - ref_pos_row;
|
||||
break;
|
||||
case 2:
|
||||
width = grid_pos_col + bw - ref_pos_col;
|
||||
height = ref_pos_row + bh - grid_pos_row;
|
||||
break;
|
||||
case 3:
|
||||
width = ref_pos_col + bw - grid_pos_col;
|
||||
height = ref_pos_row + bh - grid_pos_row;
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
||||
return width * height;
|
||||
}
|
||||
|
||||
static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
|
||||
int mi_row, int mi_col, const BLOCK_SIZE bsize) {
|
||||
TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
|
||||
TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
|
||||
MV mv = tpl_stats->mv.as_mv;
|
||||
int mv_row = mv.row >> 3;
|
||||
int mv_col = mv.col >> 3;
|
||||
|
||||
int ref_pos_row = mi_row * MI_SIZE + mv_row;
|
||||
int ref_pos_col = mi_col * MI_SIZE + mv_col;
|
||||
|
||||
const int bw = 4 << mi_size_wide_log2[bsize];
|
||||
const int bh = 4 << mi_size_high_log2[bsize];
|
||||
const int mi_height = mi_size_high[bsize];
|
||||
const int mi_width = mi_size_wide[bsize];
|
||||
const int pix_num = bw * bh;
|
||||
|
||||
// top-left on grid block location in pixel
|
||||
int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
|
||||
int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
|
||||
int block;
|
||||
|
||||
for (block = 0; block < 4; ++block) {
|
||||
int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
|
||||
int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
|
||||
|
||||
if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
|
||||
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
|
||||
int overlap_area = get_overlap_area(
|
||||
grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
|
||||
int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
|
||||
int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
|
||||
|
||||
int64_t mc_flow = tpl_stats->mc_dep_cost -
|
||||
(tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
|
||||
tpl_stats->intra_cost;
|
||||
|
||||
int idx, idy;
|
||||
|
||||
for (idy = 0; idy < mi_height; ++idy) {
|
||||
for (idx = 0; idx < mi_width; ++idx) {
|
||||
TplDepStats *des_stats =
|
||||
&ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
|
||||
(ref_mi_col + idx)];
|
||||
|
||||
des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
|
||||
des_stats->mc_ref_cost +=
|
||||
((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
|
||||
pix_num;
|
||||
assert(overlap_area >= 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
|
||||
int mi_row, int mi_col, const BLOCK_SIZE bsize) {
|
||||
int idx, idy;
|
||||
const int mi_height = mi_size_high[bsize];
|
||||
const int mi_width = mi_size_wide[bsize];
|
||||
|
||||
for (idy = 0; idy < mi_height; ++idy) {
|
||||
for (idx = 0; idx < mi_width; ++idx) {
|
||||
TplDepStats *tpl_ptr =
|
||||
&tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
|
||||
tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
|
||||
BLOCK_4X4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize, int stride,
|
||||
const TplDepStats *src_stats) {
|
||||
const int mi_height = mi_size_high[bsize];
|
||||
const int mi_width = mi_size_wide[bsize];
|
||||
int idx, idy;
|
||||
|
||||
int64_t intra_cost = src_stats->intra_cost / (mi_height * mi_width);
|
||||
int64_t inter_cost = src_stats->inter_cost / (mi_height * mi_width);
|
||||
|
||||
TplDepStats *tpl_ptr;
|
||||
|
||||
intra_cost = AOMMAX(1, intra_cost);
|
||||
inter_cost = AOMMAX(1, inter_cost);
|
||||
|
||||
for (idy = 0; idy < mi_height; ++idy) {
|
||||
tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col];
|
||||
for (idx = 0; idx < mi_width; ++idx) {
|
||||
tpl_ptr->intra_cost = intra_cost;
|
||||
tpl_ptr->inter_cost = inter_cost;
|
||||
tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
|
||||
tpl_ptr->ref_frame_index = src_stats->ref_frame_index;
|
||||
tpl_ptr->mv.as_int = src_stats->mv.as_int;
|
||||
++tpl_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mc_flow_dispenser(AV1_COMP *cpi, GF_PICTURE *gf_picture,
|
||||
int frame_idx) {
|
||||
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
|
||||
YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
|
||||
YV12_BUFFER_CONFIG *ref_frame[7] = {
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL
|
||||
};
|
||||
|
||||
AV1_COMMON *cm = &cpi->common;
|
||||
struct scale_factors sf;
|
||||
int rdmult, idx;
|
||||
ThreadData *td = &cpi->td;
|
||||
MACROBLOCK *x = &td->mb;
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
int mi_row, mi_col;
|
||||
|
||||
DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
|
||||
DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
|
||||
uint8_t *predictor;
|
||||
DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
|
||||
|
||||
const BLOCK_SIZE bsize = BLOCK_32X32;
|
||||
const TX_SIZE tx_size = max_txsize_lookup[bsize];
|
||||
const int mi_height = mi_size_high[bsize];
|
||||
const int mi_width = mi_size_wide[bsize];
|
||||
int64_t recon_error, sse;
|
||||
|
||||
// Setup scaling factor
|
||||
av1_setup_scale_factors_for_frame(
|
||||
&sf, this_frame->y_crop_width, this_frame->y_crop_height,
|
||||
this_frame->y_crop_width, this_frame->y_crop_height);
|
||||
|
||||
if (is_cur_buf_hbd(xd))
|
||||
predictor = CONVERT_TO_BYTEPTR(predictor16);
|
||||
else
|
||||
predictor = predictor8;
|
||||
|
||||
// Prepare reference frame pointers. If any reference frame slot is
|
||||
// unavailable, the pointer will be set to Null.
|
||||
for (idx = 0; idx < 7; ++idx) {
|
||||
int rf_idx = gf_picture[frame_idx].ref_frame[idx];
|
||||
if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
|
||||
}
|
||||
|
||||
xd->mi = cm->mi_grid_visible;
|
||||
xd->mi[0] = cm->mi;
|
||||
xd->cur_buf = this_frame;
|
||||
|
||||
// Get rd multiplier set up.
|
||||
rdmult = (int)av1_compute_rd_mult(cpi, tpl_frame->base_qindex);
|
||||
if (rdmult < 1) rdmult = 1;
|
||||
set_error_per_bit(&cpi->td.mb, rdmult);
|
||||
av1_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
|
||||
|
||||
tpl_frame->is_valid = 1;
|
||||
|
||||
cm->base_qindex = tpl_frame->base_qindex;
|
||||
av1_frame_init_quantizer(cpi);
|
||||
|
||||
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
|
||||
// Motion estimation row boundary
|
||||
x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * AOM_INTERP_EXTEND));
|
||||
x->mv_limits.row_max =
|
||||
(cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * AOM_INTERP_EXTEND);
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
|
||||
TplDepStats tpl_stats;
|
||||
mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff,
|
||||
qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size,
|
||||
ref_frame, predictor, &recon_error, &sse, &tpl_stats);
|
||||
|
||||
// Motion flow dependency dispenser.
|
||||
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
|
||||
tpl_frame->stride, &tpl_stats);
|
||||
|
||||
tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
|
||||
bsize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_gop_frames(AV1_COMP *cpi, GF_PICTURE *gf_picture,
|
||||
const GF_GROUP *gf_group, int *tpl_group_frames,
|
||||
const EncodeFrameInput *const frame_input) {
|
||||
AV1_COMMON *cm = &cpi->common;
|
||||
const SequenceHeader *const seq_params = &cm->seq_params;
|
||||
int frame_idx = 0;
|
||||
int i;
|
||||
int gld_index = -1;
|
||||
int alt_index = -1;
|
||||
int lst_index = -1;
|
||||
int extend_frame_count = 0;
|
||||
int pframe_qindex = cpi->tpl_stats[2].base_qindex;
|
||||
|
||||
RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
|
||||
int recon_frame_index[INTER_REFS_PER_FRAME + 1] = { -1, -1, -1, -1,
|
||||
-1, -1, -1, -1 };
|
||||
|
||||
// TODO(jingning): To be used later for gf frame type parsing.
|
||||
(void)gf_group;
|
||||
|
||||
for (i = 0; i < FRAME_BUFFERS && frame_idx < INTER_REFS_PER_FRAME + 1; ++i) {
|
||||
if (frame_bufs[i].ref_count == 0) {
|
||||
alloc_frame_mvs(cm, &frame_bufs[i]);
|
||||
if (aom_realloc_frame_buffer(
|
||||
&frame_bufs[i].buf, cm->width, cm->height,
|
||||
seq_params->subsampling_x, seq_params->subsampling_y,
|
||||
seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
|
||||
cm->byte_alignment, NULL, NULL, NULL))
|
||||
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
|
||||
"Failed to allocate frame buffer");
|
||||
|
||||
recon_frame_index[frame_idx] = i;
|
||||
++frame_idx;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < INTER_REFS_PER_FRAME + 1; ++i) {
|
||||
assert(recon_frame_index[i] >= 0);
|
||||
cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
|
||||
}
|
||||
|
||||
*tpl_group_frames = 0;
|
||||
|
||||
// Initialize Golden reference frame.
|
||||
gf_picture[0].frame = NULL;
|
||||
RefCntBuffer *ref_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
|
||||
if (ref_buf) gf_picture[0].frame = &ref_buf->buf;
|
||||
for (i = 0; i < 7; ++i) gf_picture[0].ref_frame[i] = -1;
|
||||
gld_index = 0;
|
||||
++*tpl_group_frames;
|
||||
|
||||
// Initialize ARF frame
|
||||
gf_picture[1].frame = frame_input->source;
|
||||
gf_picture[1].ref_frame[0] = gld_index;
|
||||
gf_picture[1].ref_frame[1] = lst_index;
|
||||
gf_picture[1].ref_frame[2] = alt_index;
|
||||
// TODO(yuec) Need o figure out full AV1 reference model
|
||||
for (i = 3; i < 7; ++i) gf_picture[1].ref_frame[i] = -1;
|
||||
alt_index = 1;
|
||||
++*tpl_group_frames;
|
||||
|
||||
// Initialize P frames
|
||||
for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
|
||||
struct lookahead_entry *buf =
|
||||
av1_lookahead_peek(cpi->lookahead, frame_idx - 2);
|
||||
|
||||
if (buf == NULL) break;
|
||||
|
||||
gf_picture[frame_idx].frame = &buf->img;
|
||||
gf_picture[frame_idx].ref_frame[0] = gld_index;
|
||||
gf_picture[frame_idx].ref_frame[1] = lst_index;
|
||||
gf_picture[frame_idx].ref_frame[2] = alt_index;
|
||||
for (i = 3; i < 7; ++i) gf_picture[frame_idx].ref_frame[i] = -1;
|
||||
|
||||
++*tpl_group_frames;
|
||||
lst_index = frame_idx;
|
||||
|
||||
if (frame_idx == cpi->rc.baseline_gf_interval + 1) break;
|
||||
}
|
||||
|
||||
gld_index = frame_idx;
|
||||
lst_index = AOMMAX(0, frame_idx - 1);
|
||||
alt_index = -1;
|
||||
++frame_idx;
|
||||
|
||||
// Extend two frames outside the current gf group.
|
||||
for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
|
||||
struct lookahead_entry *buf =
|
||||
av1_lookahead_peek(cpi->lookahead, frame_idx - 2);
|
||||
|
||||
if (buf == NULL) break;
|
||||
|
||||
cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
|
||||
|
||||
gf_picture[frame_idx].frame = &buf->img;
|
||||
gf_picture[frame_idx].ref_frame[0] = gld_index;
|
||||
gf_picture[frame_idx].ref_frame[1] = lst_index;
|
||||
gf_picture[frame_idx].ref_frame[2] = alt_index;
|
||||
for (i = 3; i < 7; ++i) gf_picture[frame_idx].ref_frame[i] = -1;
|
||||
lst_index = frame_idx;
|
||||
++*tpl_group_frames;
|
||||
++extend_frame_count;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_tpl_stats(AV1_COMP *cpi) {
|
||||
int frame_idx;
|
||||
for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
|
||||
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
|
||||
memset(tpl_frame->tpl_stats_ptr, 0,
|
||||
tpl_frame->height * tpl_frame->width *
|
||||
sizeof(*tpl_frame->tpl_stats_ptr));
|
||||
tpl_frame->is_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void av1_tpl_setup_stats(AV1_COMP *cpi,
|
||||
const EncodeFrameInput *const frame_input) {
|
||||
GF_PICTURE gf_picture[MAX_LAG_BUFFERS];
|
||||
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
|
||||
int tpl_group_frames = 0;
|
||||
int frame_idx;
|
||||
|
||||
init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames, frame_input);
|
||||
|
||||
init_tpl_stats(cpi);
|
||||
|
||||
// Backward propagation from tpl_group_frames to 1.
|
||||
for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx)
|
||||
mc_flow_dispenser(cpi, gf_picture, frame_idx);
|
||||
}
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_AV1_ENCODER_TPL_MODEL_H_
|
||||
#define AOM_AV1_ENCODER_TPL_MODEL_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void av1_tpl_setup_stats(AV1_COMP *cpi,
|
||||
const EncodeFrameInput *const frame_input);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_AV1_ENCODER_TPL_MODEL_H_
|
||||
+9
-777
@@ -1407,6 +1407,13 @@ static INLINE void fadst16x16_new_avx2(const __m256i *input, __m256i *output,
|
||||
output[14] = x1[15];
|
||||
output[15] = x1[0];
|
||||
}
|
||||
|
||||
static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
|
||||
const __m256i scale__r = pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
|
||||
const __m256i b = _mm256_madd_epi16(a, scale__r);
|
||||
return _mm256_srai_epi32(b, NewSqrt2Bits);
|
||||
}
|
||||
|
||||
static INLINE void fidentity16x16_new_avx2(const __m256i *input,
|
||||
__m256i *output, int8_t cos_bit) {
|
||||
(void)cos_bit;
|
||||
@@ -1990,781 +1997,6 @@ static void lowbd_fwd_txfm2d_64x16_avx2(const int16_t *input, int32_t *output,
|
||||
}
|
||||
}
|
||||
|
||||
void btf_16_avx2(__m256i w0, __m256i w1, __m256i in0, __m256i in1,
|
||||
__m128i *out0, __m128i *out1, __m128i *out2, __m128i *out3,
|
||||
__m256i __rounding, int8_t cos_bit) {
|
||||
__m256i t0 = _mm256_unpacklo_epi16(in0, in1);
|
||||
__m256i t1 = _mm256_unpackhi_epi16(in0, in1);
|
||||
__m256i u0 = _mm256_madd_epi16(t0, w0);
|
||||
__m256i u1 = _mm256_madd_epi16(t1, w0);
|
||||
__m256i v0 = _mm256_madd_epi16(t0, w1);
|
||||
__m256i v1 = _mm256_madd_epi16(t1, w1);
|
||||
|
||||
__m256i a0 = _mm256_add_epi32(u0, __rounding);
|
||||
__m256i a1 = _mm256_add_epi32(u1, __rounding);
|
||||
__m256i b0 = _mm256_add_epi32(v0, __rounding);
|
||||
__m256i b1 = _mm256_add_epi32(v1, __rounding);
|
||||
|
||||
__m256i c0 = _mm256_srai_epi32(a0, cos_bit);
|
||||
__m256i c1 = _mm256_srai_epi32(a1, cos_bit);
|
||||
__m256i d0 = _mm256_srai_epi32(b0, cos_bit);
|
||||
__m256i d1 = _mm256_srai_epi32(b1, cos_bit);
|
||||
|
||||
__m256i temp0 = _mm256_packs_epi32(c0, c1);
|
||||
__m256i temp1 = _mm256_packs_epi32(d0, d1);
|
||||
|
||||
*out0 = _mm256_castsi256_si128(temp0);
|
||||
*out1 = _mm256_castsi256_si128(temp1);
|
||||
*out2 = _mm256_extractf128_si256(temp0, 0x01);
|
||||
*out3 = _mm256_extractf128_si256(temp1, 0x01);
|
||||
}
|
||||
static INLINE void fdct8x8_new_avx2(const __m256i *input, __m256i *output,
|
||||
int8_t cos_bit) {
|
||||
const int32_t *cospi = cospi_arr(cos_bit);
|
||||
const __m256i __rounding = _mm256_set1_epi32(1 << (cos_bit - 1));
|
||||
|
||||
__m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
|
||||
__m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
|
||||
__m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
|
||||
__m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
|
||||
__m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
|
||||
__m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
|
||||
__m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
|
||||
__m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
|
||||
__m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
|
||||
|
||||
// stage 1
|
||||
__m256i x1[8];
|
||||
x1[0] = _mm256_adds_epi16(input[0], input[7]);
|
||||
x1[7] = _mm256_subs_epi16(input[0], input[7]);
|
||||
x1[1] = _mm256_adds_epi16(input[1], input[6]);
|
||||
x1[6] = _mm256_subs_epi16(input[1], input[6]);
|
||||
x1[2] = _mm256_adds_epi16(input[2], input[5]);
|
||||
x1[5] = _mm256_subs_epi16(input[2], input[5]);
|
||||
x1[3] = _mm256_adds_epi16(input[3], input[4]);
|
||||
x1[4] = _mm256_subs_epi16(input[3], input[4]);
|
||||
|
||||
// stage 2
|
||||
__m256i x2[8];
|
||||
x2[0] = _mm256_adds_epi16(x1[0], x1[3]);
|
||||
x2[3] = _mm256_subs_epi16(x1[0], x1[3]);
|
||||
x2[1] = _mm256_adds_epi16(x1[1], x1[2]);
|
||||
x2[2] = _mm256_subs_epi16(x1[1], x1[2]);
|
||||
x2[4] = x1[4];
|
||||
btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], __rounding,
|
||||
cos_bit);
|
||||
x2[5] = x1[5];
|
||||
x2[6] = x1[6];
|
||||
x2[7] = x1[7];
|
||||
|
||||
// stage 3
|
||||
__m256i x3[8];
|
||||
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x2[0], &x2[1], __rounding,
|
||||
cos_bit);
|
||||
x3[0] = x2[0];
|
||||
x3[1] = x2[1];
|
||||
btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x2[2], &x2[3], __rounding,
|
||||
cos_bit);
|
||||
x3[2] = x2[2];
|
||||
x3[3] = x2[3];
|
||||
x3[4] = _mm256_adds_epi16(x2[4], x2[5]);
|
||||
x3[5] = _mm256_subs_epi16(x2[4], x2[5]);
|
||||
x3[6] = _mm256_subs_epi16(x2[7], x2[6]);
|
||||
x3[7] = _mm256_adds_epi16(x2[7], x2[6]);
|
||||
|
||||
// stage 4
|
||||
__m256i x4[8];
|
||||
x4[0] = x3[0];
|
||||
x4[1] = x3[1];
|
||||
x4[2] = x3[2];
|
||||
x4[3] = x3[3];
|
||||
btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x3[4], &x3[7], __rounding,
|
||||
cos_bit);
|
||||
x4[4] = x3[4];
|
||||
x4[7] = x3[7];
|
||||
btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x3[5], &x3[6], __rounding,
|
||||
cos_bit);
|
||||
x4[5] = x3[5];
|
||||
x4[6] = x3[6];
|
||||
// stage 5
|
||||
output[0] = x4[0];
|
||||
output[1] = x4[4];
|
||||
output[2] = x4[2];
|
||||
output[3] = x4[6];
|
||||
output[4] = x4[1];
|
||||
output[5] = x4[5];
|
||||
output[6] = x4[3];
|
||||
output[7] = x4[7];
|
||||
}
|
||||
static INLINE void fadst8x8_new_avx2(const __m256i *input, __m256i *output,
|
||||
int8_t cos_bit) {
|
||||
const int32_t *cospi = cospi_arr(cos_bit);
|
||||
const __m256i __zero = _mm256_setzero_si256();
|
||||
const __m256i __rounding = _mm256_set1_epi32(1 << (cos_bit - 1));
|
||||
|
||||
__m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
|
||||
__m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
|
||||
__m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
|
||||
__m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
|
||||
__m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
|
||||
__m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
|
||||
__m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
|
||||
__m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
|
||||
__m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
|
||||
__m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
|
||||
__m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
|
||||
__m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
|
||||
__m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
|
||||
|
||||
// stage 1
|
||||
__m256i x1[8];
|
||||
x1[0] = input[0];
|
||||
x1[1] = _mm256_subs_epi16(__zero, input[7]);
|
||||
x1[2] = _mm256_subs_epi16(__zero, input[3]);
|
||||
x1[3] = input[4];
|
||||
x1[4] = _mm256_subs_epi16(__zero, input[1]);
|
||||
x1[5] = input[6];
|
||||
x1[6] = input[2];
|
||||
x1[7] = _mm256_subs_epi16(__zero, input[5]);
|
||||
|
||||
// stage 2
|
||||
__m256i x2[8];
|
||||
x2[0] = x1[0];
|
||||
x2[1] = x1[1];
|
||||
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], __rounding,
|
||||
cos_bit);
|
||||
x2[2] = x1[2];
|
||||
x2[3] = x1[3];
|
||||
x2[4] = x1[4];
|
||||
x2[5] = x1[5];
|
||||
btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], __rounding,
|
||||
cos_bit);
|
||||
x2[6] = x1[6];
|
||||
x2[7] = x1[7];
|
||||
|
||||
// stage 3
|
||||
__m256i x3[8];
|
||||
x3[0] = _mm256_adds_epi16(x2[0], x2[2]);
|
||||
x3[2] = _mm256_subs_epi16(x2[0], x2[2]);
|
||||
x3[1] = _mm256_adds_epi16(x2[1], x2[3]);
|
||||
x3[3] = _mm256_subs_epi16(x2[1], x2[3]);
|
||||
x3[4] = _mm256_adds_epi16(x2[4], x2[6]);
|
||||
x3[6] = _mm256_subs_epi16(x2[4], x2[6]);
|
||||
x3[5] = _mm256_adds_epi16(x2[5], x2[7]);
|
||||
x3[7] = _mm256_subs_epi16(x2[5], x2[7]);
|
||||
|
||||
// stage 4
|
||||
__m256i x4[8];
|
||||
x4[0] = x3[0];
|
||||
x4[1] = x3[1];
|
||||
x4[2] = x3[2];
|
||||
x4[3] = x3[3];
|
||||
btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x3[4], &x3[5], __rounding,
|
||||
cos_bit);
|
||||
x4[4] = x3[4];
|
||||
x4[5] = x3[5];
|
||||
btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x3[6], &x3[7], __rounding,
|
||||
cos_bit);
|
||||
x4[6] = x3[6];
|
||||
x4[7] = x3[7];
|
||||
|
||||
// stage 5
|
||||
__m256i x5[8];
|
||||
x5[0] = _mm256_adds_epi16(x4[0], x4[4]);
|
||||
x5[4] = _mm256_subs_epi16(x4[0], x4[4]);
|
||||
x5[1] = _mm256_adds_epi16(x4[1], x4[5]);
|
||||
x5[5] = _mm256_subs_epi16(x4[1], x4[5]);
|
||||
x5[2] = _mm256_adds_epi16(x4[2], x4[6]);
|
||||
x5[6] = _mm256_subs_epi16(x4[2], x4[6]);
|
||||
x5[3] = _mm256_adds_epi16(x4[3], x4[7]);
|
||||
x5[7] = _mm256_subs_epi16(x4[3], x4[7]);
|
||||
|
||||
// stage 6
|
||||
__m256i x6[8];
|
||||
btf_16_w16_avx2(cospi_p04_p60, cospi_p60_m04, &x5[0], &x5[1], __rounding,
|
||||
cos_bit);
|
||||
x6[0] = x5[0];
|
||||
x6[1] = x5[1];
|
||||
btf_16_w16_avx2(cospi_p20_p44, cospi_p44_m20, &x5[2], &x5[3], __rounding,
|
||||
cos_bit);
|
||||
x6[2] = x5[2];
|
||||
x6[3] = x5[3];
|
||||
btf_16_w16_avx2(cospi_p36_p28, cospi_p28_m36, &x5[4], &x5[5], __rounding,
|
||||
cos_bit);
|
||||
x6[4] = x5[4];
|
||||
x6[5] = x5[5];
|
||||
btf_16_w16_avx2(cospi_p52_p12, cospi_p12_m52, &x5[6], &x5[7], __rounding,
|
||||
cos_bit);
|
||||
x6[6] = x5[6];
|
||||
x6[7] = x5[7];
|
||||
|
||||
// stage 7
|
||||
output[0] = x6[1];
|
||||
output[1] = x6[6];
|
||||
output[2] = x6[3];
|
||||
output[3] = x6[4];
|
||||
output[4] = x6[5];
|
||||
output[5] = x6[2];
|
||||
output[6] = x6[7];
|
||||
output[7] = x6[0];
|
||||
}
|
||||
static INLINE void fidentity8x8_new_avx2(const __m256i *input, __m256i *output,
|
||||
int8_t cos_bit) {
|
||||
(void)cos_bit;
|
||||
|
||||
output[0] = _mm256_adds_epi16(input[0], input[0]);
|
||||
output[1] = _mm256_adds_epi16(input[1], input[1]);
|
||||
output[2] = _mm256_adds_epi16(input[2], input[2]);
|
||||
output[3] = _mm256_adds_epi16(input[3], input[3]);
|
||||
output[4] = _mm256_adds_epi16(input[4], input[4]);
|
||||
output[5] = _mm256_adds_epi16(input[5], input[5]);
|
||||
output[6] = _mm256_adds_epi16(input[6], input[6]);
|
||||
output[7] = _mm256_adds_epi16(input[7], input[7]);
|
||||
}
|
||||
static INLINE void fdct8x16_new_avx2(const __m128i *input, __m128i *output,
|
||||
int8_t cos_bit) {
|
||||
const int32_t *cospi = cospi_arr(cos_bit);
|
||||
const __m256i __rounding_256 = _mm256_set1_epi32(1 << (cos_bit - 1));
|
||||
const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
|
||||
__m128i temp0, temp1, temp2, temp3;
|
||||
__m256i in0, in1;
|
||||
__m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
|
||||
__m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
|
||||
__m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
|
||||
__m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
|
||||
__m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
|
||||
__m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
|
||||
__m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
|
||||
__m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
|
||||
__m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
|
||||
__m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
|
||||
__m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
|
||||
__m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
|
||||
__m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
|
||||
__m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
|
||||
__m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
|
||||
__m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
|
||||
__m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
|
||||
__m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
|
||||
|
||||
__m256i cospi_arr[12];
|
||||
|
||||
cospi_arr[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m32_p32),
|
||||
cospi_m32_p32, 0x1);
|
||||
cospi_arr[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
|
||||
cospi_p32_p32, 0x1);
|
||||
cospi_arr[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
|
||||
cospi_p48_p16, 0x1);
|
||||
cospi_arr[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
|
||||
cospi_m16_p48, 0x1);
|
||||
cospi_arr[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m16_p48),
|
||||
cospi_m48_m16, 0x1);
|
||||
cospi_arr[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_p16),
|
||||
cospi_m16_p48, 0x1);
|
||||
cospi_arr[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p56_p08),
|
||||
cospi_p24_p40, 0x1);
|
||||
cospi_arr[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m08_p56),
|
||||
cospi_m40_p24, 0x1);
|
||||
cospi_arr[8] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p60_p04),
|
||||
cospi_p28_p36, 0x1);
|
||||
cospi_arr[9] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m04_p60),
|
||||
cospi_m36_p28, 0x1);
|
||||
cospi_arr[10] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p44_p20),
|
||||
cospi_p12_p52, 0x1);
|
||||
cospi_arr[11] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m20_p44),
|
||||
cospi_m52_p12, 0x1);
|
||||
|
||||
__m256i x[8];
|
||||
x[0] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[0]), input[1], 0x1);
|
||||
x[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[15]), input[14],
|
||||
0x1);
|
||||
x[2] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[2]), input[3], 0x1);
|
||||
x[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[13]), input[12],
|
||||
0x1);
|
||||
x[4] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[5]), input[4], 0x1);
|
||||
x[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[10]), input[11],
|
||||
0x1);
|
||||
x[6] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[7]), input[6], 0x1);
|
||||
x[7] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[8]), input[9], 0x1);
|
||||
|
||||
// stage 1
|
||||
__m256i x1[16];
|
||||
x1[0] = _mm256_adds_epi16(x[0], x[1]);
|
||||
x1[7] = _mm256_subs_epi16(x[0], x[1]);
|
||||
x1[1] = _mm256_adds_epi16(x[2], x[3]);
|
||||
x1[6] = _mm256_subs_epi16(x[2], x[3]);
|
||||
x1[2] = _mm256_adds_epi16(x[4], x[5]);
|
||||
x1[5] = _mm256_subs_epi16(x[4], x[5]);
|
||||
x1[3] = _mm256_adds_epi16(x[6], x[7]);
|
||||
x1[4] = _mm256_subs_epi16(x[6], x[7]);
|
||||
|
||||
// stage 2
|
||||
__m256i x2[8];
|
||||
x2[0] = _mm256_adds_epi16(x1[0], x1[3]);
|
||||
x2[7] = _mm256_subs_epi16(x1[0], x1[3]);
|
||||
x2[1] = _mm256_adds_epi16(x1[1], x1[2]);
|
||||
x2[6] = _mm256_subs_epi16(x1[1], x1[2]);
|
||||
x2[2] = x1[4];
|
||||
x2[3] = x1[7];
|
||||
btf_16_avx2(cospi_arr[0], cospi_arr[1], x1[5], x1[6], &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x2[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp0, 0x1);
|
||||
x2[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp3), temp1, 0x1);
|
||||
|
||||
// stage 3
|
||||
__m256i x3[8];
|
||||
x2[1] = _mm256_permute4x64_epi64(x2[1], 0x4e);
|
||||
x3[0] = _mm256_adds_epi16(x2[0], x2[1]);
|
||||
x3[1] = _mm256_subs_epi16(x2[0], x2[1]);
|
||||
x3[2] = _mm256_blend_epi32(x2[7], x2[6], 0xf0);
|
||||
btf_16_sse2(cospi_m32_p32, cospi_p32_p32, _mm256_castsi256_si128(x2[6]),
|
||||
_mm256_extractf128_si256(x2[7], 0x01), temp0, temp1);
|
||||
x3[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp1), temp0, 0x1);
|
||||
x3[3] = _mm256_adds_epi16(x2[2], x2[4]);
|
||||
x3[4] = _mm256_subs_epi16(x2[2], x2[4]);
|
||||
x3[5] = _mm256_adds_epi16(x2[3], x2[5]);
|
||||
x3[6] = _mm256_subs_epi16(x2[3], x2[5]);
|
||||
|
||||
// stage 4
|
||||
__m256i x4[8];
|
||||
in0 = _mm256_blend_epi32(x3[0], x3[1], 0xf0);
|
||||
in1 = _mm256_permute2f128_si256(x3[0], x3[1], 0x21);
|
||||
btf_16_avx2(cospi_arr[2], cospi_arr[3], in0, in1, &output[0], &output[8],
|
||||
&output[4], &output[12], __rounding_256, cos_bit);
|
||||
x4[2] = _mm256_adds_epi16(x3[2], x3[7]);
|
||||
x4[3] = _mm256_subs_epi16(x3[2], x3[7]);
|
||||
x4[4] = _mm256_permute2f128_si256(x3[3], x3[4], 0x20);
|
||||
x4[5] = _mm256_permute2f128_si256(x3[6], x3[5], 0x20);
|
||||
in0 = _mm256_permute2f128_si256(x3[3], x3[4], 0x31);
|
||||
in1 = _mm256_permute2f128_si256(x3[5], x3[6], 0x31);
|
||||
btf_16_avx2(cospi_arr[4], cospi_arr[5], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x4[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp2, 0x1);
|
||||
x4[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp3), temp1, 0x1);
|
||||
|
||||
// stage 5
|
||||
__m256i x5[8];
|
||||
in0 = _mm256_permute2f128_si256(x4[2], x4[3], 0x31);
|
||||
in1 = _mm256_permute2f128_si256(x4[2], x4[3], 0x20);
|
||||
btf_16_avx2(cospi_arr[6], cospi_arr[7], in0, in1, &output[2], &output[14],
|
||||
&output[10], &output[6], __rounding_256, cos_bit);
|
||||
x5[4] = _mm256_adds_epi16(x4[4], x4[6]);
|
||||
x5[5] = _mm256_subs_epi16(x4[4], x4[6]);
|
||||
x5[6] = _mm256_adds_epi16(x4[5], x4[7]);
|
||||
x5[7] = _mm256_subs_epi16(x4[5], x4[7]);
|
||||
|
||||
// stage 6
|
||||
in0 = _mm256_permute2f128_si256(x5[4], x5[5], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x5[6], x5[7], 0x31);
|
||||
btf_16_avx2(cospi_arr[8], cospi_arr[9], in0, in1, &output[1], &output[15],
|
||||
&output[9], &output[7], __rounding_256, cos_bit);
|
||||
in0 = _mm256_permute2f128_si256(x5[5], x5[4], 0x31);
|
||||
in1 = _mm256_permute2f128_si256(x5[7], x5[6], 0x20);
|
||||
btf_16_avx2(cospi_arr[10], cospi_arr[11], in0, in1, &output[5], &output[11],
|
||||
&output[13], &output[3], __rounding_256, cos_bit);
|
||||
}
|
||||
static INLINE void fadst8x16_new_avx2(const __m128i *input, __m128i *output,
|
||||
int8_t cos_bit) {
|
||||
const int32_t *cospi = cospi_arr(cos_bit);
|
||||
const __m256i __zero = _mm256_setzero_si256();
|
||||
const __m256i __rounding_256 = _mm256_set1_epi32(1 << (cos_bit - 1));
|
||||
__m256i in0, in1;
|
||||
__m128i temp0, temp1, temp2, temp3;
|
||||
|
||||
__m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
|
||||
__m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
|
||||
__m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
|
||||
__m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
|
||||
__m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
|
||||
__m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
|
||||
__m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
|
||||
__m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
|
||||
__m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
|
||||
__m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
|
||||
__m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
|
||||
__m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
|
||||
__m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
|
||||
__m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
|
||||
__m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
|
||||
__m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
|
||||
__m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
|
||||
__m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
|
||||
__m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
|
||||
__m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
|
||||
__m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
|
||||
__m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
|
||||
__m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
|
||||
__m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
|
||||
__m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
|
||||
__m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
|
||||
__m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
|
||||
|
||||
__m256i cospi_arr[20];
|
||||
|
||||
cospi_arr[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
|
||||
cospi_p32_p32, 0x1);
|
||||
cospi_arr[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
|
||||
cospi_p32_m32, 0x1);
|
||||
cospi_arr[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_p32),
|
||||
cospi_p32_p32, 0x1);
|
||||
cospi_arr[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p32_m32),
|
||||
cospi_p32_m32, 0x1);
|
||||
cospi_arr[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p16_p48),
|
||||
cospi_m48_p16, 0x1);
|
||||
cospi_arr[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_m16),
|
||||
cospi_p16_p48, 0x1);
|
||||
cospi_arr[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p16_p48),
|
||||
cospi_m48_p16, 0x1);
|
||||
cospi_arr[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p48_m16),
|
||||
cospi_p16_p48, 0x1);
|
||||
cospi_arr[8] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p08_p56),
|
||||
cospi_p40_p24, 0x1);
|
||||
cospi_arr[9] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p56_m08),
|
||||
cospi_p24_m40, 0x1);
|
||||
cospi_arr[10] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_m56_p08),
|
||||
cospi_m24_p40, 0x1);
|
||||
cospi_arr[11] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p08_p56),
|
||||
cospi_p40_p24, 0x1);
|
||||
cospi_arr[12] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p02_p62),
|
||||
cospi_p10_p54, 0x1);
|
||||
cospi_arr[13] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p62_m02),
|
||||
cospi_p54_m10, 0x1);
|
||||
cospi_arr[14] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p18_p46),
|
||||
cospi_p26_p38, 0x1);
|
||||
cospi_arr[15] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p46_m18),
|
||||
cospi_p38_m26, 0x1);
|
||||
cospi_arr[16] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p34_p30),
|
||||
cospi_p42_p22, 0x1);
|
||||
cospi_arr[17] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p30_m34),
|
||||
cospi_p22_m42, 0x1);
|
||||
cospi_arr[18] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p50_p14),
|
||||
cospi_p58_p06, 0x1);
|
||||
cospi_arr[19] = _mm256_insertf128_si256(_mm256_castsi128_si256(cospi_p14_m50),
|
||||
cospi_p06_m58, 0x1);
|
||||
|
||||
__m256i x[8];
|
||||
x[0] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[0]), input[4], 0x1);
|
||||
x[1] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[2]), input[6], 0x1);
|
||||
x[2] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[8]), input[12], 0x1);
|
||||
x[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(input[10]), input[14],
|
||||
0x1);
|
||||
x[4] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[1]), input[9], 0x1);
|
||||
x[5] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[3]), input[11], 0x1);
|
||||
x[6] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[5]), input[13], 0x1);
|
||||
x[7] =
|
||||
_mm256_insertf128_si256(_mm256_castsi128_si256(input[7]), input[15], 0x1);
|
||||
|
||||
// stage 1
|
||||
__m256i x1[8];
|
||||
x1[0] = x[0];
|
||||
x1[1] = _mm256_subs_epi16(__zero, x[7]);
|
||||
x1[2] = x[2];
|
||||
x1[3] = _mm256_subs_epi16(__zero, x[5]);
|
||||
x1[4] = _mm256_subs_epi16(__zero, x[4]);
|
||||
x1[5] = x[3];
|
||||
x1[6] = _mm256_subs_epi16(__zero, x[6]);
|
||||
x1[7] = x[1];
|
||||
|
||||
// stage 2
|
||||
__m256i x2[8];
|
||||
x2[0] = _mm256_blend_epi32(x1[0], x1[1], 0xf0);
|
||||
x2[3] = _mm256_blend_epi32(x1[3], x1[2], 0xf0);
|
||||
x2[4] = _mm256_blend_epi32(x1[4], x1[5], 0xf0);
|
||||
x2[7] = _mm256_blend_epi32(x1[7], x1[6], 0xf0);
|
||||
in0 = _mm256_blend_epi32(x1[1], x1[0], 0xf0);
|
||||
in1 = _mm256_blend_epi32(x1[2], x1[3], 0xf0);
|
||||
btf_16_avx2(cospi_arr[0], cospi_arr[1], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x2[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x2[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
in0 = _mm256_permute2f128_si256(x1[7], x1[6], 0x21);
|
||||
in1 = _mm256_permute2f128_si256(x1[4], x1[5], 0x21);
|
||||
btf_16_avx2(cospi_arr[2], cospi_arr[3], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x2[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x2[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
|
||||
// stage 3
|
||||
__m256i x3[8];
|
||||
x3[0] = _mm256_adds_epi16(x2[0], x2[1]);
|
||||
x3[1] = _mm256_subs_epi16(x2[0], x2[1]);
|
||||
x3[2] = _mm256_adds_epi16(x2[3], x2[2]);
|
||||
x3[3] = _mm256_subs_epi16(x2[3], x2[2]);
|
||||
x3[4] = _mm256_adds_epi16(x2[4], x2[5]);
|
||||
x3[5] = _mm256_subs_epi16(x2[4], x2[5]);
|
||||
x3[6] = _mm256_adds_epi16(x2[7], x2[6]);
|
||||
x3[7] = _mm256_subs_epi16(x2[7], x2[6]);
|
||||
|
||||
// stage 4
|
||||
__m256i x4[8];
|
||||
x4[0] = x3[0];
|
||||
x4[1] = x3[1];
|
||||
x4[4] = x3[4];
|
||||
x4[5] = x3[5];
|
||||
in0 = _mm256_permute2f128_si256(x3[2], x3[3], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x3[2], x3[3], 0x31);
|
||||
btf_16_avx2(cospi_arr[4], cospi_arr[5], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x4[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x4[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
in0 = _mm256_permute2f128_si256(x3[6], x3[7], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x3[6], x3[7], 0x31);
|
||||
btf_16_avx2(cospi_arr[6], cospi_arr[7], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x4[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x4[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
|
||||
// stage 5
|
||||
__m256i x5[8];
|
||||
x5[0] = _mm256_adds_epi16(x4[0], x4[2]);
|
||||
x5[1] = _mm256_subs_epi16(x4[0], x4[2]);
|
||||
x5[2] = _mm256_adds_epi16(x4[1], x4[3]);
|
||||
x5[3] = _mm256_subs_epi16(x4[1], x4[3]);
|
||||
x5[4] = _mm256_adds_epi16(x4[4], x4[6]);
|
||||
x5[5] = _mm256_subs_epi16(x4[4], x4[6]);
|
||||
x5[6] = _mm256_adds_epi16(x4[5], x4[7]);
|
||||
x5[7] = _mm256_subs_epi16(x4[5], x4[7]);
|
||||
|
||||
// stage 6
|
||||
__m256i x6[8];
|
||||
x6[0] = x5[0];
|
||||
x6[1] = x5[2];
|
||||
x6[2] = x5[1];
|
||||
x6[3] = x5[3];
|
||||
in0 = _mm256_permute2f128_si256(x5[4], x5[6], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x5[4], x5[6], 0x31);
|
||||
btf_16_avx2(cospi_arr[8], cospi_arr[9], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x6[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x6[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
in0 = _mm256_permute2f128_si256(x5[5], x5[7], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x5[5], x5[7], 0x31);
|
||||
btf_16_avx2(cospi_arr[10], cospi_arr[11], in0, in1, &temp0, &temp1, &temp2,
|
||||
&temp3, __rounding_256, cos_bit);
|
||||
x6[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp0), temp1, 0x1);
|
||||
x6[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(temp2), temp3, 0x1);
|
||||
|
||||
// stage 7
|
||||
__m256i x7[8];
|
||||
x7[0] = _mm256_adds_epi16(x6[0], x6[4]);
|
||||
x7[1] = _mm256_subs_epi16(x6[0], x6[4]);
|
||||
x7[2] = _mm256_adds_epi16(x6[1], x6[5]);
|
||||
x7[3] = _mm256_subs_epi16(x6[1], x6[5]);
|
||||
x7[4] = _mm256_adds_epi16(x6[2], x6[6]);
|
||||
x7[5] = _mm256_subs_epi16(x6[2], x6[6]);
|
||||
x7[6] = _mm256_adds_epi16(x6[3], x6[7]);
|
||||
x7[7] = _mm256_subs_epi16(x6[3], x6[7]);
|
||||
|
||||
// stage 8
|
||||
in0 = _mm256_permute2f128_si256(x7[0], x7[2], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x7[0], x7[2], 0x31);
|
||||
btf_16_avx2(cospi_arr[12], cospi_arr[13], in0, in1, &output[15], &output[0],
|
||||
&output[13], &output[2], __rounding_256, cos_bit);
|
||||
in0 = _mm256_permute2f128_si256(x7[4], x7[6], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x7[4], x7[6], 0x31);
|
||||
btf_16_avx2(cospi_arr[14], cospi_arr[15], in0, in1, &output[11], &output[4],
|
||||
&output[9], &output[6], __rounding_256, cos_bit);
|
||||
in0 = _mm256_permute2f128_si256(x7[1], x7[3], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x7[1], x7[3], 0x31);
|
||||
btf_16_avx2(cospi_arr[16], cospi_arr[17], in0, in1, &output[7], &output[8],
|
||||
&output[5], &output[10], __rounding_256, cos_bit);
|
||||
in0 = _mm256_permute2f128_si256(x7[5], x7[7], 0x20);
|
||||
in1 = _mm256_permute2f128_si256(x7[5], x7[7], 0x31);
|
||||
btf_16_avx2(cospi_arr[18], cospi_arr[19], in0, in1, &output[3], &output[12],
|
||||
&output[1], &output[14], __rounding_256, cos_bit);
|
||||
}
|
||||
static INLINE void fidentity8x16_new_avx2(const __m128i *input, __m128i *output,
|
||||
int8_t cos_bit) {
|
||||
(void)cos_bit;
|
||||
const __m256i one = _mm256_set1_epi16(1);
|
||||
__m256i temp;
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
temp = _mm256_insertf128_si256(_mm256_castsi128_si256(input[i]),
|
||||
input[i + 1], 0x1);
|
||||
const __m256i a_lo = _mm256_unpacklo_epi16(temp, one);
|
||||
const __m256i a_hi = _mm256_unpackhi_epi16(temp, one);
|
||||
const __m256i b_lo = scale_round_avx2(a_lo, 2 * NewSqrt2);
|
||||
const __m256i b_hi = scale_round_avx2(a_hi, 2 * NewSqrt2);
|
||||
temp = _mm256_packs_epi32(b_lo, b_hi);
|
||||
output[i] = _mm256_castsi256_si128(temp);
|
||||
output[i + 1] = _mm256_extractf128_si256(temp, 0x1);
|
||||
}
|
||||
}
|
||||
static const transform_1d_avx2 row_txfm8x16_arr[TX_TYPES] = {
|
||||
fdct8x8_new_avx2, // DCT_DCT
|
||||
fdct8x8_new_avx2, // ADST_DCT
|
||||
fadst8x8_new_avx2, // DCT_ADST
|
||||
fadst8x8_new_avx2, // ADST_ADST
|
||||
fdct8x8_new_avx2, // FLIPADST_DCT
|
||||
fadst8x8_new_avx2, // DCT_FLIPADST
|
||||
fadst8x8_new_avx2, // FLIPADST_FLIPADST
|
||||
fadst8x8_new_avx2, // ADST_FLIPADST
|
||||
fadst8x8_new_avx2, // FLIPADST_ADST
|
||||
fidentity8x8_new_avx2, // IDTX
|
||||
fidentity8x8_new_avx2, // V_DCT
|
||||
fdct8x8_new_avx2, // H_DCT
|
||||
fidentity8x8_new_avx2, // V_ADST
|
||||
fadst8x8_new_avx2, // H_ADST
|
||||
fidentity8x8_new_avx2, // V_FLIPADST
|
||||
fadst8x8_new_avx2 // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_1d_sse2 col_txfm8x16_arr[TX_TYPES] = {
|
||||
fdct8x16_new_avx2, // DCT_DCT
|
||||
fadst8x16_new_avx2, // ADST_DCT
|
||||
fdct8x16_new_avx2, // DCT_ADST
|
||||
fadst8x16_new_avx2, // ADST_ADST
|
||||
fadst8x16_new_avx2, // FLIPADST_DCT
|
||||
fdct8x16_new_avx2, // DCT_FLIPADST
|
||||
fadst8x16_new_avx2, // FLIPADST_FLIPADST
|
||||
fadst8x16_new_avx2, // ADST_FLIPADST
|
||||
fadst8x16_new_avx2, // FLIPADST_ADST
|
||||
fidentity8x16_new_avx2, // IDTX
|
||||
fdct8x16_new_avx2, // V_DCT
|
||||
fidentity8x16_new_avx2, // H_DCT
|
||||
fadst8x16_new_avx2, // V_ADST
|
||||
fidentity8x16_new_avx2, // H_ADST
|
||||
fadst8x16_new_avx2, // V_FLIPADST
|
||||
fidentity8x16_new_avx2 // H_FLIPADST
|
||||
};
|
||||
static const transform_1d_avx2 col_txfm16x8_arr[TX_TYPES] = {
|
||||
fdct8x8_new_avx2, // DCT_DCT
|
||||
fadst8x8_new_avx2, // ADST_DCT
|
||||
fdct8x8_new_avx2, // DCT_ADST
|
||||
fadst8x8_new_avx2, // ADST_ADST
|
||||
fadst8x8_new_avx2, // FLIPADST_DCT
|
||||
fdct8x8_new_avx2, // DCT_FLIPADST
|
||||
fadst8x8_new_avx2, // FLIPADST_FLIPADST
|
||||
fadst8x8_new_avx2, // ADST_FLIPADST
|
||||
fadst8x8_new_avx2, // FLIPADST_ADST
|
||||
fidentity8x8_new_avx2, // IDTX
|
||||
fdct8x8_new_avx2, // V_DCT
|
||||
fidentity8x8_new_avx2, // H_DCT
|
||||
fadst8x8_new_avx2, // V_ADST
|
||||
fidentity8x8_new_avx2, // H_ADST
|
||||
fadst8x8_new_avx2, // V_FLIPADST
|
||||
fidentity8x8_new_avx2, // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_1d_sse2 row_txfm16x8_arr[TX_TYPES] = {
|
||||
fdct8x16_new_avx2, // DCT_DCT
|
||||
fdct8x16_new_avx2, // ADST_DCT
|
||||
fadst8x16_new_avx2, // DCT_ADST
|
||||
fadst8x16_new_avx2, // ADST_ADST
|
||||
fdct8x16_new_avx2, // FLIPADST_DCT
|
||||
fadst8x16_new_avx2, // DCT_FLIPADST
|
||||
fadst8x16_new_avx2, // FLIPADST_FLIPADST
|
||||
fadst8x16_new_avx2, // ADST_FLIPADST
|
||||
fadst8x16_new_avx2, // FLIPADST_ADST
|
||||
fidentity8x16_new_avx2, // IDTX
|
||||
fidentity8x16_new_avx2, // V_DCT
|
||||
fdct8x16_new_avx2, // H_DCT
|
||||
fidentity8x16_new_avx2, // V_ADST
|
||||
fadst8x16_new_avx2, // H_ADST
|
||||
fidentity8x16_new_avx2, // V_FLIPADST
|
||||
fadst8x16_new_avx2 // H_FLIPADST
|
||||
};
|
||||
void lowbd_fwd_txfm2d_8x16_avx2(const int16_t *input, int32_t *output,
|
||||
int stride, TX_TYPE tx_type, int bd) {
|
||||
(void)bd;
|
||||
__m128i buf0[16], buf1[16];
|
||||
__m256i buf2[8];
|
||||
const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
|
||||
const int txw_idx = get_txw_idx(TX_8X16);
|
||||
const int txh_idx = get_txh_idx(TX_8X16);
|
||||
const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
|
||||
const int width = 8;
|
||||
const int height = 16;
|
||||
const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
|
||||
const transform_1d_avx2 row_txfm = row_txfm8x16_arr[tx_type];
|
||||
int ud_flip, lr_flip;
|
||||
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
if (ud_flip) {
|
||||
load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
|
||||
} else {
|
||||
load_buffer_16bit_to_16bit(input, stride, buf0, height);
|
||||
}
|
||||
round_shift_16bit(buf0, height, shift[0]);
|
||||
col_txfm(buf0, buf0, cos_bit_col);
|
||||
round_shift_16bit(buf0, height, shift[1]);
|
||||
transpose_16bit_8x8(buf0, buf1);
|
||||
transpose_16bit_8x8(buf0 + 8, buf1 + 8);
|
||||
|
||||
__m128i *bufl, *bufu;
|
||||
if (lr_flip) {
|
||||
bufl = buf0;
|
||||
bufu = buf0 + 8;
|
||||
flip_buf_sse2(buf1 + width * 0, bufl, width);
|
||||
flip_buf_sse2(buf1 + width * 1, bufu, width);
|
||||
} else {
|
||||
bufl = buf1 + width * 0;
|
||||
bufu = buf1 + width * 1;
|
||||
}
|
||||
pack_reg(bufl, bufu, buf2);
|
||||
row_txfm(buf2, buf2, cos_bit_row);
|
||||
round_shift_16bit_w16_avx2(buf2, width, shift[2]);
|
||||
transpose_16bit_16x8_avx2(buf2, buf2);
|
||||
store_rect_buffer_16bit_to_32bit_w8_avx2(buf2, output, width, 8);
|
||||
}
|
||||
void lowbd_fwd_txfm2d_16x8_avx2(const int16_t *input, int32_t *output,
|
||||
int stride, TX_TYPE tx_type, int bd) {
|
||||
(void)bd;
|
||||
__m128i buf0[16], buf1[16];
|
||||
__m256i buf2[8];
|
||||
const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
|
||||
const int txw_idx = get_txw_idx(TX_16X8);
|
||||
const int txh_idx = get_txh_idx(TX_16X8);
|
||||
const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
|
||||
const int width = 16;
|
||||
const int height = 8;
|
||||
const transform_1d_avx2 col_txfm = col_txfm16x8_arr[tx_type];
|
||||
const transform_1d_sse2 row_txfm = row_txfm16x8_arr[tx_type];
|
||||
__m128i *buf;
|
||||
int ud_flip, lr_flip;
|
||||
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
if (ud_flip) {
|
||||
load_buffer_16bit_to_16bit_flip(input + 8 * 0, stride, buf0, height);
|
||||
load_buffer_16bit_to_16bit_flip(input + 8 * 1, stride, &buf0[8], height);
|
||||
} else {
|
||||
load_buffer_16bit_to_16bit(input + 8 * 0, stride, buf0, height);
|
||||
load_buffer_16bit_to_16bit(input + 8 * 1, stride, &buf0[8], height);
|
||||
}
|
||||
pack_reg(buf0, &buf0[8], buf2);
|
||||
round_shift_16bit_w16_avx2(buf2, height, shift[0]);
|
||||
col_txfm(buf2, buf2, cos_bit_col);
|
||||
round_shift_16bit_w16_avx2(buf2, height, shift[1]);
|
||||
transpose_16bit_16x8_avx2(buf2, buf2);
|
||||
extract_reg(buf2, buf1);
|
||||
|
||||
if (lr_flip) {
|
||||
buf = buf0;
|
||||
flip_buf_sse2(buf1, buf, width);
|
||||
} else {
|
||||
buf = buf1;
|
||||
}
|
||||
row_txfm(buf, buf, cos_bit_row);
|
||||
round_shift_16bit(buf, width, shift[2]);
|
||||
transpose_16bit_8x8(buf, buf);
|
||||
store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
|
||||
transpose_16bit_8x8(buf + 8, buf + 8);
|
||||
store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
|
||||
}
|
||||
static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
|
||||
av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform
|
||||
av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform
|
||||
@@ -2773,8 +2005,8 @@ static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
|
||||
lowbd_fwd_txfm2d_64x64_avx2, // 64x64 transform
|
||||
av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform
|
||||
av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform
|
||||
lowbd_fwd_txfm2d_8x16_avx2, // 8x16 transform
|
||||
lowbd_fwd_txfm2d_16x8_avx2, // 16x8 transform
|
||||
av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform
|
||||
av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform
|
||||
lowbd_fwd_txfm2d_16x32_avx2, // 16x32 transform
|
||||
lowbd_fwd_txfm2d_32x16_avx2, // 32x16 transform
|
||||
lowbd_fwd_txfm2d_32x64_avx2, // 32x64 transform
|
||||
|
||||
+3
-5
@@ -101,8 +101,6 @@ set_aom_config_var(CONFIG_DENOISE 1 NUMBER
|
||||
"Denoise/noise modeling support in encoder.")
|
||||
set_aom_config_var(CONFIG_FILEOPTIONS 1 NUMBER
|
||||
"Enables encoder config file support.")
|
||||
set_aom_config_var(CONFIG_FIX_GF_LENGTH 1 NUMBER
|
||||
"Fix the GF length if possible")
|
||||
set_aom_config_var(CONFIG_INSPECTION 0 NUMBER "Enables bitstream inspection.")
|
||||
set_aom_config_var(CONFIG_INTERNAL_STATS 0 NUMBER
|
||||
"Enables internal encoder stats.")
|
||||
@@ -118,8 +116,6 @@ set_aom_config_var(DECODE_HEIGHT_LIMIT 0 NUMBER "Set limit for decode height.")
|
||||
set_aom_config_var(DECODE_WIDTH_LIMIT 0 NUMBER "Set limit for decode width.")
|
||||
|
||||
# AV1 experiment flags.
|
||||
set_aom_config_var(CONFIG_COLLECT_INTER_MODE_RD_STATS 1 NUMBER
|
||||
"AV1 experiment flag.")
|
||||
set_aom_config_var(CONFIG_SPEED_STATS 0 NUMBER "AV1 experiment flag.")
|
||||
set_aom_config_var(CONFIG_COLLECT_RD_STATS 0 NUMBER "AV1 experiment flag.")
|
||||
set_aom_config_var(CONFIG_DIST_8X8 0 NUMBER "AV1 experiment flag.")
|
||||
@@ -132,7 +128,9 @@ set_aom_config_var(CONFIG_2PASS_PARTITION_SEARCH_LVL 1 NUMBER
|
||||
set_aom_config_var(CONFIG_SHARP_SETTINGS 0 NUMBER "AV1 experiment flag.")
|
||||
set_aom_config_var(CONFIG_ONE_PASS_SVM 0 NUMBER "AV1 experiment flag.")
|
||||
set_aom_config_var(CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1 NUMBER
|
||||
"Disable full_pixel_motion_search_based_split on BLOCK_8X8")
|
||||
"Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
|
||||
set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0 NUMBER
|
||||
"Collect stats on partition decisions.")
|
||||
|
||||
#
|
||||
# Variables in this section control optional features of the build system.
|
||||
|
||||
+5
@@ -149,6 +149,11 @@ const AvxInterface *get_aom_encoder_by_name(const char *name) {
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// large scale tile encoding
|
||||
static const AvxInterface aom_lst_encoder = { "av1", LST_FOURCC,
|
||||
&aom_codec_av1_cx };
|
||||
const AvxInterface *get_aom_lst_encoder(void) { return &aom_lst_encoder; }
|
||||
#endif // CONFIG_AV1_ENCODER
|
||||
|
||||
#if CONFIG_AV1_DECODER
|
||||
|
||||
+4
@@ -85,6 +85,9 @@ enum {
|
||||
NV12, // Tile output in NV12 format.
|
||||
} UENUM1BYTE(OUTPUT_FORMAT);
|
||||
|
||||
// The fourcc for large_scale_tile encoding is "LSTC".
|
||||
#define LST_FOURCC 0x4354534c
|
||||
|
||||
struct FileTypeDetectionBuffer {
|
||||
char buf[4];
|
||||
size_t buf_read;
|
||||
@@ -150,6 +153,7 @@ typedef struct AvxInterface {
|
||||
int get_aom_encoder_count(void);
|
||||
const AvxInterface *get_aom_encoder_by_index(int i);
|
||||
const AvxInterface *get_aom_encoder_by_name(const char *name);
|
||||
const AvxInterface *get_aom_lst_encoder(void);
|
||||
|
||||
int get_aom_decoder_count(void);
|
||||
const AvxInterface *get_aom_decoder_by_index(int i);
|
||||
|
||||
+4
@@ -121,3 +121,7 @@ FILE *aom_video_reader_get_file(AvxVideoReader *reader) {
|
||||
const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader) {
|
||||
return &reader->info;
|
||||
}
|
||||
|
||||
void aom_video_reader_set_fourcc(AvxVideoReader *reader, uint32_t fourcc) {
|
||||
reader->info.codec_fourcc = fourcc;
|
||||
}
|
||||
|
||||
+3
@@ -50,6 +50,9 @@ FILE *aom_video_reader_get_file(AvxVideoReader *reader);
|
||||
// Fills AvxVideoInfo with information from opened video file.
|
||||
const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader);
|
||||
|
||||
// Set fourcc.
|
||||
void aom_video_reader_set_fourcc(AvxVideoReader *reader, uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
+4
@@ -75,3 +75,7 @@ int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void aom_video_writer_set_fourcc(AvxVideoWriter *writer, uint32_t fourcc) {
|
||||
writer->info.codec_fourcc = fourcc;
|
||||
}
|
||||
|
||||
+2
@@ -37,6 +37,8 @@ void aom_video_writer_close(AvxVideoWriter *writer);
|
||||
// Writes frame bytes to the file.
|
||||
int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
|
||||
size_t size, int64_t pts);
|
||||
// Set fourcc.
|
||||
void aom_video_writer_set_fourcc(AvxVideoWriter *writer, uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+72
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*
|
||||
* See build_av1_dec_fuzzer.sh for building instructions.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "aom/aom_decoder.h"
|
||||
#include "aom/aomdx.h"
|
||||
#include "aom_ports/mem_ops.h"
|
||||
#include "common/ivfdec.h"
|
||||
|
||||
static void close_file(FILE *file) { fclose(file); }
|
||||
|
||||
extern "C" void usage_exit(void) { exit(EXIT_FAILURE); }
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
std::unique_ptr<FILE, decltype(&close_file)> file(
|
||||
fmemopen((void *)data, size, "rb"), &close_file);
|
||||
if (file == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char header[32];
|
||||
if (fread(header, 1, 32, file.get()) != 32) {
|
||||
return 0;
|
||||
}
|
||||
const AvxInterface *decoder = get_aom_decoder_by_name("av1");
|
||||
if (decoder == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
aom_codec_ctx_t codec;
|
||||
// Set thread count in the range [1, 64].
|
||||
const unsigned int threads = (header[0] & 0x3f) + 1;
|
||||
aom_codec_dec_cfg_t cfg = { threads, 0, 0, CONFIG_LOWBITDEPTH };
|
||||
if (aom_codec_dec_init(&codec, decoder->codec_interface(), &cfg, 0)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *buffer = nullptr;
|
||||
size_t buffer_size = 0;
|
||||
size_t frame_size = 0;
|
||||
while (!ivf_read_frame(file.get(), &buffer, &frame_size, &buffer_size,
|
||||
nullptr)) {
|
||||
const aom_codec_err_t err =
|
||||
aom_codec_decode(&codec, buffer, frame_size, nullptr);
|
||||
static_cast<void>(err);
|
||||
aom_codec_iter_t iter = nullptr;
|
||||
aom_image_t *img = nullptr;
|
||||
while ((img = aom_codec_get_frame(&codec, &iter)) != nullptr) {
|
||||
}
|
||||
}
|
||||
aom_codec_destroy(&codec);
|
||||
free(buffer);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019, Alliance for Open Media. All rights reserved
|
||||
#
|
||||
# This source code is subject to the terms of the BSD 2 Clause License and
|
||||
# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
# was not distributed with this source code in the LICENSE file, you can
|
||||
# obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
# Media Patent License 1.0 was not distributed with this source code in the
|
||||
# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
#
|
||||
###############################################################################
|
||||
# Fuzzer for libaom decoder.
|
||||
# ==========================
|
||||
# Requirements
|
||||
# ---------------------
|
||||
# Clang6.0 or above (must support -fsanitize=fuzzer)
|
||||
#
|
||||
# References:
|
||||
# ---------------------
|
||||
# http://llvm.org/docs/LibFuzzer.html
|
||||
# https://github.com/google/oss-fuzz
|
||||
#
|
||||
# Steps to build / run
|
||||
# ---------------------
|
||||
|
||||
set -eu
|
||||
|
||||
# Have a copy of AOM and a build directory ready.
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Pass in the AOM source tree as first argument, and a build directory "
|
||||
echo "as the second argument. The AOM source tree can be obtained via: "
|
||||
echo " git clone https://aomedia.googlesource.com/aom"
|
||||
exit 2
|
||||
fi
|
||||
if [[ -z "$CC" ]]; then
|
||||
echo "Set the CC environment variable to point to your C compiler."
|
||||
exit 2
|
||||
fi
|
||||
if [[ -z "$CXX" ]]; then
|
||||
echo "Set the CXX environment variable to point to your C++ compiler."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
AOM_DIR=$1
|
||||
BUILD_DIR=$2
|
||||
# Run CMake with address sanitizer enabled and build the codec.
|
||||
# Enable DO_RANGE_CHECK_CLAMP to suppress the noise of integer overflows
|
||||
# in the transform functions. Also set memory limits.
|
||||
EXTRA_C_FLAGS='-DDO_RANGE_CHECK_CLAMP=1 -DAOM_MAX_ALLOCABLE_MEMORY=1073741824'
|
||||
cd "${BUILD_DIR}"
|
||||
cmake "${AOM_DIR}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCONFIG_PIC=1 \
|
||||
-DCONFIG_SCALABILITY=0 -DCONFIG_LOWBITDEPTH=1 -DCONFIG_AV1_ENCODER=0 \
|
||||
-DENABLE_EXAMPLES=0 -DENABLE_DOCS=0 -DENABLE_TESTS=0 -DCONFIG_SIZE_LIMIT=1 \
|
||||
-DDECODE_HEIGHT_LIMIT=12288 -DDECODE_WIDTH_LIMIT=12288 \
|
||||
-DAOM_EXTRA_C_FLAGS="${EXTRA_C_FLAGS}" \
|
||||
-DAOM_EXTRA_CXX_FLAGS="${EXTRA_C_FLAGS}" -DSANITIZE=address
|
||||
|
||||
# Build the codec.
|
||||
make -j$(nproc)
|
||||
|
||||
# Build some libaom utils that are not part of the core lib.
|
||||
$CC -std=c99 -c -I${AOM_DIR} -I${BUILD_DIR} \
|
||||
${AOM_DIR}/common/ivfdec.c -o ${BUILD_DIR}/ivfdec.o
|
||||
|
||||
$CC -std=c99 -c -I${AOM_DIR} -I${BUILD_DIR} \
|
||||
${AOM_DIR}/common/tools_common.c -o ${BUILD_DIR}/tools_common.o
|
||||
|
||||
# Build the av1 fuzzer
|
||||
$CXX -std=c++11 -DDECODER=av1 -I${AOM_DIR} -I${BUILD_DIR} \
|
||||
-fsanitize=fuzzer -Wl,--start-group \
|
||||
${AOM_DIR}/examples/av1_dec_fuzzer.cc -o ${BUILD_DIR}/av1_dec_fuzzer \
|
||||
${BUILD_DIR}/libaom.a ${BUILD_DIR}/ivfdec.o ${BUILD_DIR}/tools_common.o \
|
||||
-Wl,--end-group
|
||||
|
||||
echo "Fuzzer built at ${BUILD_DIR}/av1_dec_fuzzer."
|
||||
echo "Create a corpus directory, copy IVF files in there, and run:"
|
||||
echo " av1_dec_fuzzer CORPUS_DIR"
|
||||
@@ -211,6 +211,8 @@ int main(int argc, char **argv) {
|
||||
num_references = (int)strtol(argv[3], NULL, 0);
|
||||
info = aom_video_reader_get_info(reader);
|
||||
|
||||
aom_video_reader_set_fourcc(reader, AV1_FOURCC);
|
||||
|
||||
// The writer to write out ivf file in tile list OBU, which can be decoded by
|
||||
// AV1 decoder.
|
||||
writer = aom_video_writer_open(argv[2], kContainerIVF, info);
|
||||
|
||||
+4
-2
@@ -188,8 +188,10 @@ int main(int argc, char **argv) {
|
||||
|
||||
info = aom_video_reader_get_info(reader);
|
||||
|
||||
decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
|
||||
if (!decoder) die("Unknown input codec.");
|
||||
if (info->codec_fourcc == LST_FOURCC)
|
||||
decoder = get_aom_decoder_by_fourcc(AV1_FOURCC);
|
||||
else
|
||||
die("Unknown input codec.");
|
||||
printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
|
||||
|
||||
if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
|
||||
|
||||
@@ -397,6 +397,10 @@ static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
|
||||
for (i = 0; i < reference_image_num; i++) aom_img_free(&reference_images[i]);
|
||||
|
||||
if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
|
||||
|
||||
// Modify large_scale_file fourcc.
|
||||
if (cfg->large_scale_tile == 1)
|
||||
aom_video_writer_set_fourcc(writer, LST_FOURCC);
|
||||
aom_video_writer_close(writer);
|
||||
|
||||
printf("\nSecond pass complete. Processed %d frames.\n", frame_count);
|
||||
|
||||
+1
-64
@@ -287,67 +287,6 @@ void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
|
||||
}
|
||||
}
|
||||
}
|
||||
void AV1FwdTxfm2dSpeedTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
|
||||
TxfmParam param;
|
||||
memset(¶m, 0, sizeof(param));
|
||||
const int rows = tx_size_high[tx_size];
|
||||
const int cols = tx_size_wide[tx_size];
|
||||
const int num_loops = 1000000 / (rows * cols);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
const int bd = 8;
|
||||
for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
|
||||
if (libaom_test::IsTxSizeTypeValid(
|
||||
tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
|
||||
if (ref_func != NULL) {
|
||||
DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
|
||||
DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
|
||||
DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
|
||||
int input_stride = 64;
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
for (int r = 0; r < rows; ++r) {
|
||||
for (int c = 0; c < cols; ++c) {
|
||||
input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
|
||||
}
|
||||
}
|
||||
|
||||
param.tx_type = (TX_TYPE)tx_type;
|
||||
param.tx_size = (TX_SIZE)tx_size;
|
||||
param.tx_set_type = EXT_TX_SET_ALL16;
|
||||
param.bd = bd;
|
||||
|
||||
aom_usec_timer ref_timer, test_timer;
|
||||
|
||||
aom_usec_timer_start(&ref_timer);
|
||||
for (int i = 0; i < num_loops; ++i) {
|
||||
ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
|
||||
}
|
||||
aom_usec_timer_mark(&ref_timer);
|
||||
const int elapsed_time_c =
|
||||
static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
|
||||
|
||||
aom_usec_timer_start(&test_timer);
|
||||
for (int i = 0; i < num_loops; ++i) {
|
||||
target_func(input, output, input_stride, ¶m);
|
||||
}
|
||||
aom_usec_timer_mark(&test_timer);
|
||||
const int elapsed_time_simd =
|
||||
static_cast<int>(aom_usec_timer_elapsed(&test_timer));
|
||||
|
||||
printf(
|
||||
"txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
|
||||
"gain=%d \n",
|
||||
tx_size, tx_type, elapsed_time_c, elapsed_time_simd,
|
||||
(elapsed_time_c / elapsed_time_simd));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef ::testing::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
|
||||
|
||||
@@ -356,9 +295,7 @@ class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
|
||||
TEST_P(AV1FwdTxfm2dTest, match) {
|
||||
AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
|
||||
}
|
||||
TEST_P(AV1FwdTxfm2dTest, DISABLED_Speed) {
|
||||
AV1FwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
|
||||
}
|
||||
|
||||
using ::testing::Combine;
|
||||
using ::testing::Values;
|
||||
using ::testing::ValuesIn;
|
||||
|
||||
+3
@@ -411,6 +411,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_16X16, TYPE_B, AOM_BITS_8),
|
||||
make_tuple(&aom_quantize_b_32x32_c,
|
||||
&aom_quantize_b_32x32_ssse3, TX_32X32, TYPE_B,
|
||||
AOM_BITS_8),
|
||||
make_tuple(&aom_quantize_b_64x64_c,
|
||||
&aom_quantize_b_64x64_ssse3, TX_64X64, TYPE_B,
|
||||
AOM_BITS_8)));
|
||||
|
||||
#endif // HAVE_SSSE3 && ARCH_X86_64
|
||||
|
||||
Vendored
+1
-1
@@ -297,7 +297,7 @@ class ResizeInternalTestLarge : public ResizeTest {
|
||||
|
||||
virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
|
||||
if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
|
||||
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.5);
|
||||
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 3.0);
|
||||
}
|
||||
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
|
||||
Reference in New Issue
Block a user