diff --git a/media/libvpx/Makefile.in b/media/libvpx/Makefile.in index 2f5eab073..ae715cb50 100644 --- a/media/libvpx/Makefile.in +++ b/media/libvpx/Makefile.in @@ -288,6 +288,16 @@ CSRCS += \ vp9_variance_sse2.c \ variance_mmx.c \ variance_sse2.c \ + vp9_loopfilter_intrin_avx2.c \ + vp9_subpixel_8t_intrin_avx2.c \ + vp9_dct_avx2.c \ + vp9_error_intrin_avx2.c \ + vp9_subpel_variance_impl_intrin_avx2.c \ + vp9_variance_avx2.c \ + sad4d_avx2.c \ + sad_avx2.c \ + variance_avx2.c \ + variance_impl_avx2.c \ $(NULL) VPX_ASM_ENC_OFFSETS_SRCS = \ diff --git a/media/libvpx/update.py b/media/libvpx/update.py index 917cfb788..85eed5872 100644 --- a/media/libvpx/update.py +++ b/media/libvpx/update.py @@ -398,14 +398,13 @@ def prepare_upstream(prefix, commit=None): configure = ['../../configure', '--target=%s' % target, '--disable-examples', '--disable-install-docs', '--enable-multi-res-encoding', - '--size-limit=4000x3000' + '--size-limit=8192x4608' ] if 'darwin9' in target: configure += ['--enable-pic'] if 'linux' in target: configure += ['--enable-pic'] - configure += ['--disable-avx2'] # x86inc.asm is not compatible with pic 32bit builds if target == 'x86-linux-gcc': configure += ['--disable-use-x86inc'] @@ -593,6 +592,22 @@ def apply_patches(): # Cherry pick https://chromium-review.googlesource.com/#/c/276889/ # to fix crash on 32bit os.system("patch -p1 < vp9_filter_restore_aligment.patch") + # Patch win32 vpx_once. + os.system("patch -p3 < vpx_once.patch") + # Bug 1224363 - Clamp seg_lvl also in abs-value mode. + os.system("patch -p3 < clamp_abs_lvl_seg.patch") + # Bug 1224361 - Clamp QIndex also in abs-value mode. + os.system("patch -p3 < clamp-abs-QIndex.patch") + # Bug 1233983 - Make libvpx build with clang-cl + os.system("patch -p3 < clang-cl.patch") + # Bug 1224371 - Cast uint8_t to uint32_t before shift + os.system("patch -p3 < cast-char-to-uint-before-shift.patch") + # Bug 1237848 - Check lookahead ctx + os.system("patch -p3 < 1237848-check-lookahead-ctx.patch") + # Bug 1263384 - Check input frame resolution + os.system("patch -p3 < input_frame_validation.patch") + # Bug 1315288 - Check input frame resolution for vp9 + os.system("patch -p3 < input_frame_validation_vp9.patch") def update_readme(commit): with open('README_MOZILLA') as f: diff --git a/media/libvpx/vp8/common/generic/systemdependent.c b/media/libvpx/vp8/common/generic/systemdependent.c index 4393ced48..8ee7e0230 100644 --- a/media/libvpx/vp8/common/generic/systemdependent.c +++ b/media/libvpx/vp8/common/generic/systemdependent.c @@ -24,6 +24,7 @@ #include #elif defined(_WIN32) #include +#include typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); #elif defined(__OS2__) #define INCL_DOS diff --git a/media/libvpx/vp8/common/rtcd.c b/media/libvpx/vp8/common/rtcd.c index ab0e9b47f..98c2ecd74 100644 --- a/media/libvpx/vp8/common/rtcd.c +++ b/media/libvpx/vp8/common/rtcd.c @@ -11,6 +11,9 @@ #define RTCD_C #include "./vp8_rtcd.h" #include "vpx_ports/vpx_once.h" +#ifdef _MSC_VER +#include +#endif void vp8_rtcd() diff --git a/media/libvpx/vp8/decoder/decodeframe.c b/media/libvpx/vp8/decoder/decodeframe.c index 2dc21ae80..4d247cbf8 100644 --- a/media/libvpx/vp8/decoder/decodeframe.c +++ b/media/libvpx/vp8/decoder/decodeframe.c @@ -71,9 +71,8 @@ void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) /* Delta Value */ else - { QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - } + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ } else diff --git a/media/libvpx/vp8/decoder/threading.c b/media/libvpx/vp8/decoder/threading.c index 6801532f1..a76672fa8 100644 --- a/media/libvpx/vp8/decoder/threading.c +++ b/media/libvpx/vp8/decoder/threading.c @@ -28,6 +28,9 @@ #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif +#ifdef _MSC_VER +#include +#endif #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) #define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ diff --git a/media/libvpx/vp8/encoder/encodeframe.c b/media/libvpx/vp8/encoder/encodeframe.c index d381d8ddf..5e84fb491 100644 --- a/media/libvpx/vp8/encoder/encodeframe.c +++ b/media/libvpx/vp8/encoder/encodeframe.c @@ -34,6 +34,9 @@ #include "bitstream.h" #endif #include "encodeframe.h" +#ifdef _MSC_VER +#include +#endif extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; extern void vp8_calc_ref_frame_costs(int *ref_frame_cost, diff --git a/media/libvpx/vp8/encoder/ethreading.c b/media/libvpx/vp8/encoder/ethreading.c index 4e234ccd5..519ae73b4 100644 --- a/media/libvpx/vp8/encoder/ethreading.c +++ b/media/libvpx/vp8/encoder/ethreading.c @@ -14,6 +14,9 @@ #include "vp8/common/extend.h" #include "bitstream.h" #include "encodeframe.h" +#ifdef _MSC_VER +#include +#endif #if CONFIG_MULTITHREAD diff --git a/media/libvpx/vp8/encoder/lookahead.c b/media/libvpx/vp8/encoder/lookahead.c index ce2ce08c1..662338574 100644 --- a/media/libvpx/vp8/encoder/lookahead.c +++ b/media/libvpx/vp8/encoder/lookahead.c @@ -181,6 +181,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, { struct lookahead_entry* buf = NULL; + assert(ctx != NULL); if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1)) { buf = pop(ctx, &ctx->read_idx); diff --git a/media/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/vp8/vp8_cx_iface.c index 802cb2e8a..5674176f1 100644 --- a/media/libvpx/vp8/vp8_cx_iface.c +++ b/media/libvpx/vp8/vp8_cx_iface.c @@ -922,7 +922,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, res = image2yuvconfig(img, &sd); if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) { - /* from vp8_encoder.h for g_w/g_h: + /* from vpx_encoder.h for g_w/g_h: "Note that the frames passed as input to the encoder must have this resolution" */ ctx->base.err_detail = "Invalid input frame resolution"; diff --git a/media/libvpx/vp9/decoder/vp9_decoder.c b/media/libvpx/vp9/decoder/vp9_decoder.c index 7991a39e6..cc60c596f 100644 --- a/media/libvpx/vp9/decoder/vp9_decoder.c +++ b/media/libvpx/vp9/decoder/vp9_decoder.c @@ -499,7 +499,7 @@ vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, uint32_t this_sz = 0; for (j = 0; j < mag; ++j) - this_sz |= (*x++) << (j * 8); + this_sz |= (uint32_t)(*x++) << (j * 8); sizes[i] = this_sz; } *count = frames; diff --git a/media/libvpx/vp9/encoder/vp9_lookahead.c b/media/libvpx/vp9/encoder/vp9_lookahead.c index b8e2ca88c..fd32a16b4 100644 --- a/media/libvpx/vp9/encoder/vp9_lookahead.c +++ b/media/libvpx/vp9/encoder/vp9_lookahead.c @@ -207,7 +207,7 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { + if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } diff --git a/media/libvpx/vp9/vp9_cx_iface.c b/media/libvpx/vp9/vp9_cx_iface.c index 9462be9fa..e0b8718fb 100644 --- a/media/libvpx/vp9/vp9_cx_iface.c +++ b/media/libvpx/vp9/vp9_cx_iface.c @@ -994,11 +994,19 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, if (img != NULL) { res = image2yuvconfig(img, &sd); - // Store the original flags in to the frame buffer. Will extract the - // key frame flag when we actually encode this frame. - if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, - &sd, dst_time_stamp, dst_end_time_stamp)) { - res = update_error_state(ctx, &cpi->common.error); + if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) { + /* from vpx_encoder.h for g_w/g_h: + "Note that the frames passed as input to the encoder must have this resolution" + */ + ctx->base.err_detail = "Invalid input frame resolution"; + res = VPX_CODEC_INVALID_PARAM; + } else { + // Store the original flags in to the frame buffer. Will extract the + // key frame flag when we actually encode this frame. + if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, + &sd, dst_time_stamp, dst_end_time_stamp)) { + res = update_error_state(ctx, &cpi->common.error); + } } ctx->next_frame_flags = 0; } diff --git a/media/libvpx/vp9_rtcd_x86-linux-gcc.h b/media/libvpx/vp9_rtcd_x86-linux-gcc.h index 8b7af70d7..8c4aae55b 100644 --- a/media/libvpx/vp9_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86-linux-gcc.h @@ -38,7 +38,8 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p); RTCD_EXTERN unsigned int (*vp9_avg_8x8)(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -#define vp9_block_error vp9_block_error_c +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -47,6 +48,7 @@ RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *d void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -67,11 +69,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -213,6 +217,7 @@ RTCD_EXTERN void (*vp9_fdct16x16_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); @@ -221,6 +226,7 @@ RTCD_EXTERN void (*vp9_fdct32x32_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -359,6 +365,7 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -436,7 +443,8 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int sourc #define vp9_sub_pixel_avg_variance32x16 vp9_sub_pixel_avg_variance32x16_c unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); -#define vp9_sub_pixel_avg_variance32x32 vp9_sub_pixel_avg_variance32x32_c +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance32x64 vp9_sub_pixel_avg_variance32x64_c @@ -451,7 +459,8 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int sourc #define vp9_sub_pixel_avg_variance64x32 vp9_sub_pixel_avg_variance64x32_c unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); -#define vp9_sub_pixel_avg_variance64x64 vp9_sub_pixel_avg_variance64x64_c +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #define vp9_sub_pixel_avg_variance8x16 vp9_sub_pixel_avg_variance8x16_c @@ -475,7 +484,8 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_st #define vp9_sub_pixel_variance32x16 vp9_sub_pixel_variance32x16_c unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_sub_pixel_variance32x32 vp9_sub_pixel_variance32x32_c +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); #define vp9_sub_pixel_variance32x64 vp9_sub_pixel_variance32x64_c @@ -490,7 +500,8 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_st #define vp9_sub_pixel_variance64x32 vp9_sub_pixel_variance64x32_c unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vp9_sub_pixel_variance64x64 vp9_sub_pixel_variance64x64_c +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); #define vp9_sub_pixel_variance8x16 vp9_sub_pixel_variance8x16_c @@ -550,11 +561,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_avg_4x4 = vp9_avg_4x4_sse2; vp9_avg_8x8 = vp9_avg_8x8_c; if (flags & HAS_SSE2) vp9_avg_8x8 = vp9_avg_8x8_sse2; + vp9_block_error = vp9_block_error_c; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_block_error_fp = vp9_block_error_fp_c; if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; @@ -567,19 +581,23 @@ static void setup_rtcd_internal(void) vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; vp9_fdct16x16_1 = vp9_fdct16x16_1_c; if (flags & HAS_SSE2) vp9_fdct16x16_1 = vp9_fdct16x16_1_sse2; vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; vp9_fdct32x32_1 = vp9_fdct32x32_1_c; if (flags & HAS_SSE2) vp9_fdct32x32_1 = vp9_fdct32x32_1_sse2; vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; vp9_fdct4x4_1 = vp9_fdct4x4_1_c; @@ -638,6 +656,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_int_pro_row = vp9_int_pro_row_sse2; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; @@ -666,6 +685,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2; vp9_satd = vp9_satd_c; if (flags & HAS_SSE2) vp9_satd = vp9_satd_sse2; + vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; + vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; + vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; + vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; vp9_vector_var = vp9_vector_var_c; diff --git a/media/libvpx/vp9_rtcd_x86-win32-vs12.h b/media/libvpx/vp9_rtcd_x86-win32-vs12.h index 31cf78174..0725c15ba 100644 --- a/media/libvpx/vp9_rtcd_x86-win32-vs12.h +++ b/media/libvpx/vp9_rtcd_x86-win32-vs12.h @@ -39,6 +39,7 @@ RTCD_EXTERN unsigned int (*vp9_avg_8x8)(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -48,6 +49,7 @@ RTCD_EXTERN int64_t (*vp9_block_error_fp)(const int16_t *coeff, const int16_t *d void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -68,11 +70,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -247,6 +251,7 @@ RTCD_EXTERN void (*vp9_fdct16x16_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); @@ -255,6 +260,7 @@ RTCD_EXTERN void (*vp9_fdct32x32_1)(const int16_t *input, tran_low_t *output, in void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -398,6 +404,7 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -485,6 +492,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -510,6 +518,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -550,6 +559,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -575,6 +585,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -651,11 +662,13 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_avg_8x8 = vp9_avg_8x8_sse2; vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_block_error_fp = vp9_block_error_fp_c; if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2; vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; @@ -668,9 +681,11 @@ static void setup_rtcd_internal(void) vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_convolve_avg = vp9_convolve_avg_c; if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2; vp9_convolve_copy = vp9_convolve_copy_c; @@ -743,10 +758,12 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_fdct16x16_1 = vp9_fdct16x16_1_sse2; vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; vp9_fdct32x32_1 = vp9_fdct32x32_1_c; if (flags & HAS_SSE2) vp9_fdct32x32_1 = vp9_fdct32x32_1_sse2; vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; vp9_fdct4x4_1 = vp9_fdct4x4_1_c; @@ -815,6 +832,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_int_pro_row = vp9_int_pro_row_sse2; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; @@ -858,6 +876,7 @@ static void setup_rtcd_internal(void) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; @@ -873,6 +892,7 @@ static void setup_rtcd_internal(void) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; @@ -897,6 +917,7 @@ static void setup_rtcd_internal(void) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; @@ -912,6 +933,7 @@ static void setup_rtcd_internal(void) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; diff --git a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h index 34e7f3160..8d75771cf 100644 --- a/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vp9_rtcd_x86_64-linux-gcc.h @@ -39,7 +39,8 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -#define vp9_block_error vp9_block_error_sse2 +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -48,6 +49,7 @@ int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, in void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -68,11 +70,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -247,7 +251,8 @@ void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); -#define vp9_fdct32x32 vp9_fdct32x32_sse2 +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -255,7 +260,8 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); -#define vp9_fdct32x32_rd vp9_fdct32x32_rd_sse2 +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -402,7 +408,8 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); -#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_sse2 +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -493,6 +500,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -518,6 +526,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -558,6 +567,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -583,6 +593,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -654,8 +665,11 @@ static void setup_rtcd_internal(void) (void)flags; + vp9_block_error = vp9_block_error_sse2; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; @@ -664,8 +678,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; @@ -696,6 +712,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; + vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; + vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct8x8 = vp9_fdct8x8_sse2; if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3; vp9_fdct8x8_quant = vp9_fdct8x8_quant_sse2; @@ -717,6 +737,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; + vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -735,6 +757,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; @@ -745,6 +768,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; @@ -761,6 +785,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; @@ -771,6 +796,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; diff --git a/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h index b2520c432..8d75771cf 100644 --- a/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h +++ b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h @@ -39,6 +39,7 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p); int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); @@ -48,6 +49,7 @@ int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, in void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -68,11 +70,13 @@ RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_str void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -247,6 +251,7 @@ void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); @@ -255,6 +260,7 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -402,6 +408,7 @@ void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -493,6 +500,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -518,6 +526,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_p unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); @@ -558,6 +567,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -583,6 +593,7 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -655,8 +666,10 @@ static void setup_rtcd_internal(void) (void)flags; vp9_block_error = vp9_block_error_sse2; + if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2; vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; + if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; @@ -665,8 +678,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; @@ -698,7 +713,9 @@ static void setup_rtcd_internal(void) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; vp9_fdct32x32 = vp9_fdct32x32_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; vp9_fdct8x8 = vp9_fdct8x8_sse2; if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3; vp9_fdct8x8_quant = vp9_fdct8x8_quant_sse2; @@ -721,6 +738,7 @@ static void setup_rtcd_internal(void) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; + if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; vp9_quantize_b = vp9_quantize_b_sse2; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; @@ -739,6 +757,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; @@ -749,6 +768,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; @@ -765,6 +785,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; @@ -775,6 +796,7 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; + if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; diff --git a/media/libvpx/vpx_config_armv7-android-gcc.h b/media/libvpx/vpx_config_armv7-android-gcc.h index ff7e42f71..b2ef8c207 100644 --- a/media/libvpx/vpx_config_armv7-android-gcc.h +++ b/media/libvpx/vpx_config_armv7-android-gcc.h @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_generic-gnu.h b/media/libvpx/vpx_config_generic-gnu.h index 724edd6ed..c62e7b057 100644 --- a/media/libvpx/vpx_config_generic-gnu.h +++ b/media/libvpx/vpx_config_generic-gnu.h @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86-darwin9-gcc.h b/media/libvpx/vpx_config_x86-darwin9-gcc.h index 7cca4b699..51a41512a 100644 --- a/media/libvpx/vpx_config_x86-darwin9-gcc.h +++ b/media/libvpx/vpx_config_x86-darwin9-gcc.h @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86-linux-gcc.asm b/media/libvpx/vpx_config_x86-linux-gcc.asm index 63b48bb17..fe52e0682 100644 --- a/media/libvpx/vpx_config_x86-linux-gcc.asm +++ b/media/libvpx/vpx_config_x86-linux-gcc.asm @@ -17,7 +17,7 @@ HAVE_SSE3 equ 1 HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 -HAVE_AVX2 equ 0 +HAVE_AVX2 equ 1 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 HAVE_PTHREAD_H equ 1 diff --git a/media/libvpx/vpx_config_x86-linux-gcc.h b/media/libvpx/vpx_config_x86-linux-gcc.h index edcf0ee9f..a987397f0 100644 --- a/media/libvpx/vpx_config_x86-linux-gcc.h +++ b/media/libvpx/vpx_config_x86-linux-gcc.h @@ -29,7 +29,7 @@ #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 #define HAVE_AVX 1 -#define HAVE_AVX2 0 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 #define HAVE_PTHREAD_H 1 @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86-win32-gcc.h b/media/libvpx/vpx_config_x86-win32-gcc.h index abc34be9d..7654e5fee 100644 --- a/media/libvpx/vpx_config_x86-win32-gcc.h +++ b/media/libvpx/vpx_config_x86-win32-gcc.h @@ -93,6 +93,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86-win32-vs12.h b/media/libvpx/vpx_config_x86-win32-vs12.h index a86a54ede..42525a303 100644 --- a/media/libvpx/vpx_config_x86-win32-vs12.h +++ b/media/libvpx/vpx_config_x86-win32-vs12.h @@ -28,8 +28,8 @@ #define HAVE_SSE3 1 #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 -#define HAVE_AVX 0 -#define HAVE_AVX2 0 +#define HAVE_AVX 1 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 0 #define HAVE_PTHREAD_H 0 @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86_64-darwin9-gcc.h b/media/libvpx/vpx_config_x86_64-darwin9-gcc.h index d04556d45..dd986e04a 100644 --- a/media/libvpx/vpx_config_x86_64-darwin9-gcc.h +++ b/media/libvpx/vpx_config_x86_64-darwin9-gcc.h @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86_64-linux-gcc.asm b/media/libvpx/vpx_config_x86_64-linux-gcc.asm index 481d00df0..7c808c83f 100644 --- a/media/libvpx/vpx_config_x86_64-linux-gcc.asm +++ b/media/libvpx/vpx_config_x86_64-linux-gcc.asm @@ -17,7 +17,7 @@ HAVE_SSE3 equ 1 HAVE_SSSE3 equ 1 HAVE_SSE4_1 equ 1 HAVE_AVX equ 1 -HAVE_AVX2 equ 0 +HAVE_AVX2 equ 1 HAVE_VPX_PORTS equ 1 HAVE_STDINT_H equ 1 HAVE_PTHREAD_H equ 1 diff --git a/media/libvpx/vpx_config_x86_64-linux-gcc.h b/media/libvpx/vpx_config_x86_64-linux-gcc.h index dcd1ad82a..dd986e04a 100644 --- a/media/libvpx/vpx_config_x86_64-linux-gcc.h +++ b/media/libvpx/vpx_config_x86_64-linux-gcc.h @@ -29,7 +29,7 @@ #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 #define HAVE_AVX 1 -#define HAVE_AVX2 0 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 1 #define HAVE_PTHREAD_H 1 @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.h b/media/libvpx/vpx_config_x86_64-win64-gcc.h index cf8a66268..f9bb5c35a 100644 --- a/media/libvpx/vpx_config_x86_64-win64-gcc.h +++ b/media/libvpx/vpx_config_x86_64-win64-gcc.h @@ -93,6 +93,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_config_x86_64-win64-vs12.h b/media/libvpx/vpx_config_x86_64-win64-vs12.h index b0929ebe7..65e45f5ba 100644 --- a/media/libvpx/vpx_config_x86_64-win64-vs12.h +++ b/media/libvpx/vpx_config_x86_64-win64-vs12.h @@ -28,8 +28,8 @@ #define HAVE_SSE3 1 #define HAVE_SSSE3 1 #define HAVE_SSE4_1 1 -#define HAVE_AVX 0 -#define HAVE_AVX2 0 +#define HAVE_AVX 1 +#define HAVE_AVX2 1 #define HAVE_VPX_PORTS 1 #define HAVE_STDINT_H 0 #define HAVE_PTHREAD_H 0 @@ -92,6 +92,6 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 -#define DECODE_WIDTH_LIMIT 4000 -#define DECODE_HEIGHT_LIMIT 3000 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 #endif /* VPX_CONFIG_H */ diff --git a/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c b/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c index 5fe27b614..d2476039d 100644 --- a/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c +++ b/media/libvpx/vpx_dsp/vpx_dsp_rtcd.c @@ -11,6 +11,9 @@ #define RTCD_C #include "./vpx_dsp_rtcd.h" #include "vpx_ports/vpx_once.h" +#ifdef _MSC_VER +#include +#endif void vpx_dsp_rtcd() { once(setup_rtcd_internal); diff --git a/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h index 5cb892577..d69c28c1a 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h +++ b/media/libvpx/vpx_dsp_rtcd_x86-linux-gcc.h @@ -23,6 +23,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); @@ -41,6 +42,7 @@ RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -103,34 +105,41 @@ void vpx_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x16 vpx_sad32x16_c +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x16_avg vpx_sad32x16_avg_c +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad32x16x4d vpx_sad32x16x4d_c unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x32 vpx_sad32x32_c +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x32_avg vpx_sad32x32_avg_c +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x3 vpx_sad32x32x3_c void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad32x32x4d vpx_sad32x32x4d_c +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x8 vpx_sad32x32x8_c unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x64 vpx_sad32x64_c +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x64_avg vpx_sad32x64_avg_c +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad32x64x4d vpx_sad32x64x4d_c @@ -166,25 +175,30 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p #define vpx_sad4x8x8 vpx_sad4x8x8_c unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x32 vpx_sad64x32_c +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x32_avg vpx_sad64x32_avg_c +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); #define vpx_sad64x32x4d vpx_sad64x32x4d_c unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x64 vpx_sad64x64_c +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x64_avg vpx_sad64x64_avg_c +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x3 vpx_sad64x64x3_c void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad64x64x4d vpx_sad64x64x4d_c +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x8 vpx_sad64x64x8_c @@ -240,6 +254,7 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -253,10 +268,12 @@ RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_ unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -274,10 +291,12 @@ RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_s unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -306,6 +325,7 @@ static void setup_rtcd_internal(void) vpx_get16x16var = vpx_get16x16var_c; if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; vpx_get8x8var = vpx_get8x8var_c; if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; @@ -315,6 +335,7 @@ static void setup_rtcd_internal(void) vpx_mse16x16 = vpx_mse16x16_c; if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; vpx_mse8x16 = vpx_mse8x16_c; @@ -335,12 +356,36 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; vpx_sad16x8x8 = vpx_sad16x8x8_c; if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_c; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_c; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_c; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_c; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_c; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_c; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_c; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad4x4 = vpx_sad4x4_c; if (flags & HAS_MMX) vpx_sad4x4 = vpx_sad4x4_mmx; vpx_sad4x4x3 = vpx_sad4x4x3_c; if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; vpx_sad4x4x8 = vpx_sad4x4x8_c; if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_c; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_c; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_c; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_c; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_c; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16 = vpx_sad8x16_c; if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; vpx_sad8x16x3 = vpx_sad8x16x3_c; @@ -356,6 +401,7 @@ static void setup_rtcd_internal(void) vpx_variance16x16 = vpx_variance16x16_c; if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; vpx_variance16x8 = vpx_variance16x8_c; @@ -363,8 +409,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_c; if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; vpx_variance4x4 = vpx_variance4x4_c; @@ -374,8 +422,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; vpx_variance64x32 = vpx_variance64x32_c; if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; vpx_variance64x64 = vpx_variance64x64_c; if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; vpx_variance8x16 = vpx_variance8x16_c; if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; diff --git a/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h b/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h index b2814b03f..32ee77e25 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h +++ b/media/libvpx/vpx_dsp_rtcd_x86-win32-vs12.h @@ -23,6 +23,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); @@ -41,6 +42,7 @@ RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -113,10 +115,12 @@ RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -125,10 +129,12 @@ RTCD_EXTERN void (*vpx_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, cons unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -136,6 +142,7 @@ void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -143,10 +150,12 @@ void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -191,10 +200,12 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -203,10 +214,12 @@ RTCD_EXTERN void (*vpx_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, cons unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -214,6 +227,7 @@ void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -279,6 +293,7 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -292,10 +307,12 @@ RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_ unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -313,10 +330,12 @@ RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_s unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -345,6 +364,7 @@ static void setup_rtcd_internal(void) vpx_get16x16var = vpx_get16x16var_c; if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; vpx_get8x8var = vpx_get8x8var_c; if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx; if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2; @@ -354,6 +374,7 @@ static void setup_rtcd_internal(void) vpx_mse16x16 = vpx_mse16x16_c; if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx; if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; vpx_mse8x16 = vpx_mse8x16_c; @@ -392,20 +413,27 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; vpx_sad32x16 = vpx_sad32x16_c; if (flags & HAS_SSE2) vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; vpx_sad32x16_avg = vpx_sad32x16_avg_c; if (flags & HAS_SSE2) vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; vpx_sad32x16x4d = vpx_sad32x16x4d_c; if (flags & HAS_SSE2) vpx_sad32x16x4d = vpx_sad32x16x4d_sse2; vpx_sad32x32 = vpx_sad32x32_c; if (flags & HAS_SSE2) vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; vpx_sad32x32_avg = vpx_sad32x32_avg_c; if (flags & HAS_SSE2) vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; vpx_sad32x32x4d = vpx_sad32x32x4d_c; if (flags & HAS_SSE2) vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; vpx_sad32x64 = vpx_sad32x64_c; if (flags & HAS_SSE2) vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; vpx_sad32x64_avg = vpx_sad32x64_avg_c; if (flags & HAS_SSE2) vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad32x64x4d = vpx_sad32x64x4d_c; if (flags & HAS_SSE2) vpx_sad32x64x4d = vpx_sad32x64x4d_sse2; vpx_sad4x4 = vpx_sad4x4_c; @@ -427,16 +455,21 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE) vpx_sad4x8x4d = vpx_sad4x8x4d_sse; vpx_sad64x32 = vpx_sad64x32_c; if (flags & HAS_SSE2) vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; vpx_sad64x32_avg = vpx_sad64x32_avg_c; if (flags & HAS_SSE2) vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; vpx_sad64x32x4d = vpx_sad64x32x4d_c; if (flags & HAS_SSE2) vpx_sad64x32x4d = vpx_sad64x32x4d_sse2; vpx_sad64x64 = vpx_sad64x64_c; if (flags & HAS_SSE2) vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; vpx_sad64x64_avg = vpx_sad64x64_avg_c; if (flags & HAS_SSE2) vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; vpx_sad64x64x4d = vpx_sad64x64x4d_c; if (flags & HAS_SSE2) vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16 = vpx_sad8x16_c; if (flags & HAS_MMX) vpx_sad8x16 = vpx_sad8x16_mmx; if (flags & HAS_SSE2) vpx_sad8x16 = vpx_sad8x16_sse2; @@ -468,6 +501,7 @@ static void setup_rtcd_internal(void) vpx_variance16x16 = vpx_variance16x16_c; if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx; if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; vpx_variance16x8 = vpx_variance16x8_c; @@ -475,8 +509,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_c; if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; vpx_variance4x4 = vpx_variance4x4_c; @@ -486,8 +522,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2; vpx_variance64x32 = vpx_variance64x32_c; if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; vpx_variance64x64 = vpx_variance64x64_c; if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; vpx_variance8x16 = vpx_variance8x16_c; if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx; if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2; diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h index 227fe0d69..d93c56eb7 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-linux-gcc.h @@ -23,7 +23,8 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -#define vpx_get16x16var vpx_get16x16var_sse2 +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); #define vpx_get4x4sse_cs vpx_get4x4sse_cs_c @@ -41,7 +42,8 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); -#define vpx_mse16x16 vpx_mse16x16_sse2 +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -113,11 +115,13 @@ RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x16 vpx_sad32x16_sse2 +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x16_avg vpx_sad32x16_avg_sse2 +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -125,29 +129,34 @@ void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x32 vpx_sad32x32_sse2 +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x32_avg vpx_sad32x32_avg_sse2 +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x3 vpx_sad32x32x3_c void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad32x32x4d vpx_sad32x32x4d_sse2 +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad32x32x8 vpx_sad32x32x8_c unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad32x64 vpx_sad32x64_sse2 +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad32x64_avg vpx_sad32x64_avg_sse2 +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -191,11 +200,13 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x32 vpx_sad64x32_sse2 +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x32_avg vpx_sad64x32_avg_sse2 +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -203,18 +214,21 @@ void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); -#define vpx_sad64x64 vpx_sad64x64_sse2 +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -#define vpx_sad64x64_avg vpx_sad64x64_avg_sse2 +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x3 vpx_sad64x64x3_c void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); -#define vpx_sad64x64x4d vpx_sad64x64x4d_sse2 +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); #define vpx_sad64x64x8 vpx_sad64x64x8_c @@ -279,7 +293,8 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance16x16 vpx_variance16x16_sse2 +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -292,11 +307,13 @@ unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, co unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance32x16 vpx_variance32x16_sse2 +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance32x32 vpx_variance32x32_sse2 +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -313,11 +330,13 @@ unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, con unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance64x32 vpx_variance64x32_sse2 +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -#define vpx_variance64x64 vpx_variance64x64_sse2 +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -343,6 +362,10 @@ static void setup_rtcd_internal(void) (void)flags; + vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; + vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_sad16x16x3 = vpx_sad16x16x3_c; if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; @@ -353,10 +376,34 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vpx_sad16x8x3 = vpx_sad16x8x3_ssse3; vpx_sad16x8x8 = vpx_sad16x8x8_c; if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; + vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; + vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; + vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; + vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; + vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; + vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; + vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad4x4x3 = vpx_sad4x4x3_c; if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; vpx_sad4x4x8 = vpx_sad4x4x8_c; if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; + vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; + vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; + vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; + vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; + vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16x3 = vpx_sad8x16x3_c; if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; vpx_sad8x16x8 = vpx_sad8x16x8_c; @@ -365,6 +412,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3; vpx_sad8x8x8 = vpx_sad8x8x8_c; if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; + vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; + vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; + vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; } #endif diff --git a/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h index fef9bf89e..d93c56eb7 100644 --- a/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h +++ b/media/libvpx/vpx_dsp_rtcd_x86_64-win64-vs12.h @@ -23,6 +23,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); @@ -41,6 +42,7 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *); unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); @@ -113,10 +115,12 @@ RTCD_EXTERN void (*vpx_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const unsigned int vpx_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -125,10 +129,12 @@ void vpx_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -136,6 +142,7 @@ void vpx_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref void vpx_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); RTCD_EXTERN void (*vpx_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -143,10 +150,12 @@ void vpx_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref unsigned int vpx_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -191,10 +200,12 @@ void vpx_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p unsigned int vpx_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); @@ -203,10 +214,12 @@ void vpx_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t unsigned int vpx_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int vpx_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); RTCD_EXTERN unsigned int (*vpx_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); unsigned int vpx_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); unsigned int vpx_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int vpx_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); RTCD_EXTERN unsigned int (*vpx_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -214,6 +227,7 @@ void vpx_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref void vpx_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); RTCD_EXTERN void (*vpx_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); void vpx_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); @@ -279,6 +293,7 @@ RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const u unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -292,10 +307,12 @@ unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, co unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -313,10 +330,12 @@ unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, con unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); @@ -344,7 +363,9 @@ static void setup_rtcd_internal(void) (void)flags; vpx_get16x16var = vpx_get16x16var_sse2; + if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2; vpx_mse16x16 = vpx_mse16x16_sse2; + if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; vpx_sad16x16x3 = vpx_sad16x16x3_c; if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3; if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3; @@ -356,21 +377,33 @@ static void setup_rtcd_internal(void) vpx_sad16x8x8 = vpx_sad16x8x8_c; if (flags & HAS_SSE4_1) vpx_sad16x8x8 = vpx_sad16x8x8_sse4_1; vpx_sad32x16 = vpx_sad32x16_sse2; + if (flags & HAS_AVX2) vpx_sad32x16 = vpx_sad32x16_avx2; vpx_sad32x16_avg = vpx_sad32x16_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x16_avg = vpx_sad32x16_avg_avx2; vpx_sad32x32 = vpx_sad32x32_sse2; + if (flags & HAS_AVX2) vpx_sad32x32 = vpx_sad32x32_avx2; vpx_sad32x32_avg = vpx_sad32x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x32_avg = vpx_sad32x32_avg_avx2; vpx_sad32x32x4d = vpx_sad32x32x4d_sse2; + if (flags & HAS_AVX2) vpx_sad32x32x4d = vpx_sad32x32x4d_avx2; vpx_sad32x64 = vpx_sad32x64_sse2; + if (flags & HAS_AVX2) vpx_sad32x64 = vpx_sad32x64_avx2; vpx_sad32x64_avg = vpx_sad32x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad32x64_avg = vpx_sad32x64_avg_avx2; vpx_sad4x4x3 = vpx_sad4x4x3_c; if (flags & HAS_SSE3) vpx_sad4x4x3 = vpx_sad4x4x3_sse3; vpx_sad4x4x8 = vpx_sad4x4x8_c; if (flags & HAS_SSE4_1) vpx_sad4x4x8 = vpx_sad4x4x8_sse4_1; vpx_sad64x32 = vpx_sad64x32_sse2; + if (flags & HAS_AVX2) vpx_sad64x32 = vpx_sad64x32_avx2; vpx_sad64x32_avg = vpx_sad64x32_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x32_avg = vpx_sad64x32_avg_avx2; vpx_sad64x64 = vpx_sad64x64_sse2; + if (flags & HAS_AVX2) vpx_sad64x64 = vpx_sad64x64_avx2; vpx_sad64x64_avg = vpx_sad64x64_avg_sse2; + if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2; vpx_sad64x64x4d = vpx_sad64x64x4d_sse2; + if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2; vpx_sad8x16x3 = vpx_sad8x16x3_c; if (flags & HAS_SSE3) vpx_sad8x16x3 = vpx_sad8x16x3_sse3; vpx_sad8x16x8 = vpx_sad8x16x8_c; @@ -380,10 +413,15 @@ static void setup_rtcd_internal(void) vpx_sad8x8x8 = vpx_sad8x8x8_c; if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1; vpx_variance16x16 = vpx_variance16x16_sse2; + if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; vpx_variance32x16 = vpx_variance32x16_sse2; + if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_sse2; + if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; vpx_variance64x32 = vpx_variance64x32_sse2; + if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; vpx_variance64x64 = vpx_variance64x64_sse2; + if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2; } #endif diff --git a/media/libvpx/vpx_ports/vpx_once.h b/media/libvpx/vpx_ports/vpx_once.h index f1df39434..da04db459 100644 --- a/media/libvpx/vpx_ports/vpx_once.h +++ b/media/libvpx/vpx_ports/vpx_once.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -13,63 +13,83 @@ #include "vpx_config.h" +/* Implement a function wrapper to guarantee initialization + * thread-safety for library singletons. + * + * NOTE: These functions use static locks, and can only be + * used with one common argument per compilation unit. So + * + * file1.c: + * vpx_once(foo); + * ... + * vpx_once(foo); + * + * file2.c: + * vpx_once(bar); + * + * will ensure foo() and bar() are each called only once, but in + * + * file1.c: + * vpx_once(foo); + * vpx_once(bar): + * + * bar() will never be called because the lock is used up + * by the call to foo(). + */ + #if CONFIG_MULTITHREAD && defined(_WIN32) #include #include +/* Declare a per-compilation-unit state variable to track the progress + * of calling func() only once. This must be at global scope because + * local initializers are not thread-safe in MSVC prior to Visual + * Studio 2015. + * + * As a static, once_state will be zero-initialized as program start. + */ +static LONG once_state; static void once(void (*func)(void)) { - static CRITICAL_SECTION *lock; - static LONG waiters; - static int done; - void *lock_ptr = &lock; - - /* If the initialization is complete, return early. This isn't just an - * optimization, it prevents races on the destruction of the global - * lock. + /* Try to advance once_state from its initial value of 0 to 1. + * Only one thread can succeed in doing so. */ - if(done) - return; - - InterlockedIncrement(&waiters); - - /* Get a lock. We create one and try to make it the one-true-lock, - * throwing it away if we lost the race. - */ - - { - /* Scope to protect access to new_lock */ - CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(new_lock); - if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) - { - DeleteCriticalSection(new_lock); - free(new_lock); - } - } - - /* At this point, we have a lock that can be synchronized on. We don't - * care which thread actually performed the allocation. - */ - - EnterCriticalSection(lock); - - if (!done) - { + if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { + /* We're the winning thread, having set once_state to 1. + * Call our function. */ func(); - done = 1; + /* Now advance once_state to 2, unblocking any other threads. */ + InterlockedIncrement(&once_state); + return; } - LeaveCriticalSection(lock); - - /* Last one out should free resources. The destructed objects are - * protected by checking if(done) above. + /* We weren't the winning thread, but we want to block on + * the state variable so we don't return before func() + * has finished executing elsewhere. + * + * Try to advance once_state from 2 to 2, which is only possible + * after the winning thead advances it from 1 to 2. */ - if(!InterlockedDecrement(&waiters)) - { - DeleteCriticalSection(lock); - free(lock); - lock = NULL; + while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { + /* State isn't yet 2. Try again. + * + * We are used for singleton initialization functions, + * which should complete quickly. Contention will likewise + * be rare, so it's worthwhile to use a simple but cpu- + * intensive busy-wait instead of successive backoff, + * waiting on a kernel object, or another heavier-weight scheme. + * + * We can at least yield our timeslice. + */ + Sleep(0); } + + /* We've seen once_state advance to 2, so we know func() + * has been called. And we've left once_state as we found it, + * so other threads will have the same experience. + * + * It's safe to return now. + */ + return; } diff --git a/media/libvpx/vpx_scale/vpx_scale_rtcd.c b/media/libvpx/vpx_scale/vpx_scale_rtcd.c index bea603fd1..65532ba94 100644 --- a/media/libvpx/vpx_scale/vpx_scale_rtcd.c +++ b/media/libvpx/vpx_scale/vpx_scale_rtcd.c @@ -11,6 +11,9 @@ #define RTCD_C #include "./vpx_scale_rtcd.h" #include "vpx_ports/vpx_once.h" +#ifdef _MSC_VER +#include +#endif void vpx_scale_rtcd() {