mirror of
https://github.com/roytam1/basilisk55.git
synced 2026-05-26 15:02:46 +00:00
libwebp: update to 1.0.2
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
Contributors:
|
||||
- Alan Browning (browning at google dot com)
|
||||
- Charles Munger (clm at google dot com)
|
||||
- Christian Duvivier (cduvivier at google dot com)
|
||||
- Djordje Pesut (djordje dot pesut at imgtec dot com)
|
||||
@@ -6,9 +7,10 @@ Contributors:
|
||||
- James Zern (jzern at google dot com)
|
||||
- Jan Engelhardt (jengelh at medozas dot de)
|
||||
- Jehan (jehan at girinstud dot io)
|
||||
- Johann (johann dot koenig at duck dot com)
|
||||
- Johann Koenig (johann dot koenig at duck dot com)
|
||||
- Jovan Zelincevic (jovan dot zelincevic at imgtec dot com)
|
||||
- Jyrki Alakuijala (jyrki at google dot com)
|
||||
- Konstantin Ivlev (tomskside at gmail dot com)
|
||||
- Lode Vandevenne (lode at google dot com)
|
||||
- Lou Quillio (louquillio at google dot com)
|
||||
- Mans Rullgard (mans at mansr dot com)
|
||||
@@ -35,4 +37,6 @@ Contributors:
|
||||
- Urvang Joshi (urvang at google dot com)
|
||||
- Vikas Arora (vikasa at google dot com)
|
||||
- Vincent Rabaud (vrabaud at google dot com)
|
||||
- Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
|
||||
- Yang Zhang (yang dot zhang at arm dot com)
|
||||
- Yannis Guyon (yguyon at google dot com)
|
||||
|
||||
@@ -1,3 +1,46 @@
|
||||
- 1/14/2019: version 1.0.2
|
||||
This is a binary compatible release.
|
||||
* (Windows) unicode file support in the tools (linux and mac already had
|
||||
support, issue #398)
|
||||
* lossless encoder speedups
|
||||
* lossy encoder speedup on ARM
|
||||
* lossless multi-threaded security fix (chromium:917029)
|
||||
|
||||
- 11/2/2018: version 1.0.1
|
||||
This is a binary compatible release.
|
||||
* lossless encoder speedups
|
||||
* big-endian fix for alpha decoding (issue #393)
|
||||
* gif2webp fix for loop count=65535 transcode (issue #382)
|
||||
* further security related hardening in libwebp & libwebpmux
|
||||
(issues #383, #385, #386, #387, #388, #391)
|
||||
(oss-fuzz #9099, #9100, #9105, #9106, #9111, #9112, #9119, #9123, #9170,
|
||||
#9178, #9179, #9183, #9186, #9191, #9364, #9417, #9496, #10349,
|
||||
#10423, #10634, #10700, #10838, #10922, #11021, #11088, #11152)
|
||||
* miscellaneous bug & build fixes (issues #381, #394, #396, #397, #400)
|
||||
|
||||
- 4/2/2018: version 1.0.0
|
||||
This is a binary compatible release.
|
||||
* lossy encoder improvements to avoid chroma shifts in various circumstances
|
||||
(issues #308, #340)
|
||||
* big-endian fixes for decode, RGBA import and WebPPictureDistortion
|
||||
Tool updates:
|
||||
gifwebp, anim_diff - default duration behavior (<= 10ms) changed to match
|
||||
web browsers, transcoding tools (issue #379)
|
||||
img2webp, webpmux - allow options to be passed in via a file (issue #355)
|
||||
|
||||
- 11/24/2017: version 0.6.1
|
||||
This is a binary compatible release.
|
||||
* lossless performance and compression improvements + a new 'cruncher' mode
|
||||
(-m 6 -q 100)
|
||||
* ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
|
||||
* webp-js: emscripten/webassembly based javascript decoder
|
||||
* miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
|
||||
#363)
|
||||
Tool updates / additions:
|
||||
added webpinfo - prints file format information (issue #330)
|
||||
gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
|
||||
'-loop_compatibility' can be used for the old behavior
|
||||
|
||||
- 1/26/2017: version 0.6.0
|
||||
* lossless performance and compression improvements
|
||||
* miscellaneous performance improvements (SSE2, NEON, MSA)
|
||||
|
||||
+44
-5
@@ -4,7 +4,7 @@
|
||||
\__\__/\____/\_____/__/ ____ ___
|
||||
/ _/ / \ \ / _ \/ _/
|
||||
/ \_/ / / \ \ __/ \__
|
||||
\____/____/\_____/_____/____/v0.6.0
|
||||
\____/____/\_____/_____/____/v1.0.2
|
||||
|
||||
Description:
|
||||
============
|
||||
@@ -113,8 +113,8 @@ make install
|
||||
|
||||
CMake:
|
||||
------
|
||||
The support for CMake is minimal: it only helps you compile libwebp, cwebp and
|
||||
dwebp.
|
||||
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
|
||||
and the JS bindings.
|
||||
|
||||
Prerequisites:
|
||||
A compiler (e.g., gcc with autotools) and CMake.
|
||||
@@ -123,18 +123,27 @@ minimal build:
|
||||
$ sudo apt-get install build-essential cmake
|
||||
|
||||
When building from git sources, you will need to run cmake to generate the
|
||||
configure script.
|
||||
makefiles.
|
||||
|
||||
mkdir build && cd build && cmake ../
|
||||
make
|
||||
make install
|
||||
|
||||
If you also want cwebp or dwebp, you will need to enable them through CMake:
|
||||
If you also want any of the executables, you will need to enable them through
|
||||
CMake, e.g.:
|
||||
|
||||
cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../
|
||||
|
||||
or through your favorite interface (like ccmake or cmake-qt-gui).
|
||||
|
||||
Use option -DWEBP_UNICODE=ON for Unicode support on Windows (with chcp 65001).
|
||||
|
||||
Finally, once installed, you can also use WebP in your CMake project by doing:
|
||||
|
||||
find_package(WebP)
|
||||
|
||||
which will define the CMake variables WebP_INCLUDE_DIRS and WebP_LIBRARIES.
|
||||
|
||||
Gradle:
|
||||
-------
|
||||
The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
|
||||
@@ -360,6 +369,23 @@ Use following options to convert into alternate image formats:
|
||||
-quiet ....... quiet mode, don't print anything
|
||||
-noasm ....... disable all assembly optimizations
|
||||
|
||||
WebP file analysis tool:
|
||||
========================
|
||||
|
||||
'webpinfo' can be used to print out the chunk level structure and bitstream
|
||||
header information of WebP files. It can also check if the files are of valid
|
||||
WebP format.
|
||||
|
||||
Usage: webpinfo [options] in_files
|
||||
Note: there could be multiple input files;
|
||||
options must come before input files.
|
||||
Options:
|
||||
-version ........... Print version number and exit.
|
||||
-quiet ............. Do not show chunk parsing information.
|
||||
-diag .............. Show parsing error diagnosis.
|
||||
-summary ........... Show chunk stats summary.
|
||||
-bitstream_info .... Parse bitstream header.
|
||||
|
||||
Visualization tool:
|
||||
===================
|
||||
|
||||
@@ -378,12 +404,14 @@ Options are:
|
||||
-nofilter .... disable in-loop filtering
|
||||
-dither <int> dithering strength (0..100), default=50
|
||||
-noalphadither disable alpha plane dithering
|
||||
-usebgcolor .. display background color
|
||||
-mt .......... use multi-threading
|
||||
-info ........ print info
|
||||
-h ........... this help message
|
||||
|
||||
Keyboard shortcuts:
|
||||
'c' ................ toggle use of color profile
|
||||
'b' ................ toggle background color display
|
||||
'i' ................ overlay file information
|
||||
'd' ................ disable blending & disposal (debug)
|
||||
'q' / 'Q' / ESC .... quit
|
||||
@@ -434,6 +462,7 @@ File-level options (only used at the start of compression):
|
||||
-mixed ............... use mixed lossy/lossless automatic mode
|
||||
-v ................... verbose mode
|
||||
-h ................... this help
|
||||
-version ............. print version number and exit
|
||||
|
||||
Per-frame options (only used for subsequent images input):
|
||||
-d <int> ............. frame duration in ms (default: 100)
|
||||
@@ -445,6 +474,9 @@ Per-frame options (only used for subsequent images input):
|
||||
example: img2webp -loop 2 in0.png -lossy in1.jpg
|
||||
-d 80 in2.tiff -o out.webp
|
||||
|
||||
Note: if a single file name is passed as the argument, the arguments will be
|
||||
tokenized from this file. The file name must not start with the character '-'.
|
||||
|
||||
Animated GIF conversion:
|
||||
========================
|
||||
Animated GIF files can be converted to WebP files with animation using the
|
||||
@@ -470,6 +502,8 @@ Options:
|
||||
-metadata <string> ..... comma separated list of metadata to
|
||||
copy from the input to the output if present
|
||||
Valid values: all, none, icc, xmp (default)
|
||||
-loop_compatibility .... use compatibility mode for Chrome
|
||||
version prior to M62 (inclusive)
|
||||
-mt .................... use multi-threading if available
|
||||
|
||||
-version ............... print version number and exit
|
||||
@@ -498,6 +532,11 @@ Options:
|
||||
-min_psnr <float> ... minimum per-frame PSNR
|
||||
-raw_comparison ..... if this flag is not used, RGB is
|
||||
premultiplied before comparison
|
||||
-max_diff <int> ..... maximum allowed difference per channel
|
||||
between corresponding pixels in subsequent
|
||||
frames
|
||||
-h .................. this help
|
||||
-version ............ print version number and exit
|
||||
|
||||
Building:
|
||||
---------
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
__ __ ____ ____ ____ __ __ _ __ __
|
||||
/ \\/ \/ _ \/ _ \/ _ \/ \ \/ \___/_ / _\
|
||||
\ / __/ _ \ __/ / / (_/ /__
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.0
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v1.0.2
|
||||
|
||||
|
||||
Description:
|
||||
@@ -33,6 +33,7 @@ Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT
|
||||
webpmux -info INPUT
|
||||
webpmux [-h|-help]
|
||||
webpmux -version
|
||||
webpmux argument_file_name
|
||||
|
||||
GET_OPTIONS:
|
||||
Extract relevant data:
|
||||
@@ -92,6 +93,9 @@ INPUT & OUTPUT are in WebP format.
|
||||
Note: The nature of EXIF, XMP and ICC data is not checked and is assumed to be
|
||||
valid.
|
||||
|
||||
Note: if a single file name is passed as the argument, the arguments will be
|
||||
tokenized from this file. The file name must not start with the character '-'.
|
||||
|
||||
Visualization tool:
|
||||
===================
|
||||
|
||||
@@ -207,6 +211,35 @@ Code example:
|
||||
For a detailed AnimEncoder API reference, please refer to the header file
|
||||
(src/webp/mux.h).
|
||||
|
||||
AnimDecoder API:
|
||||
================
|
||||
This AnimDecoder API allows decoding (possibly) animated WebP images.
|
||||
|
||||
Code Example:
|
||||
|
||||
WebPAnimDecoderOptions dec_options;
|
||||
WebPAnimDecoderOptionsInit(&dec_options);
|
||||
// Tune 'dec_options' as needed.
|
||||
WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
|
||||
WebPAnimInfo anim_info;
|
||||
WebPAnimDecoderGetInfo(dec, &anim_info);
|
||||
for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
|
||||
while (WebPAnimDecoderHasMoreFrames(dec)) {
|
||||
uint8_t* buf;
|
||||
int timestamp;
|
||||
WebPAnimDecoderGetNext(dec, &buf, ×tamp);
|
||||
// ... (Render 'buf' based on 'timestamp').
|
||||
// ... (Do NOT free 'buf', as it is owned by 'dec').
|
||||
}
|
||||
WebPAnimDecoderReset(dec);
|
||||
}
|
||||
const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
|
||||
// ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
|
||||
WebPAnimDecoderDelete(dec);
|
||||
|
||||
For a detailed AnimDecoder API reference, please refer to the header file
|
||||
(src/webp/demux.h).
|
||||
|
||||
|
||||
Bugs:
|
||||
=====
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
Changes made to pristine libwebp source by Moonchild Productions and mozilla.org developers.
|
||||
|
||||
2017/01/27 -- Synced with libwebp-0.6.0 (BZ #1294490).
|
||||
2018/06/29 -- Synced with libwebp-1.0.0 + BUG=webp:381,383,384.
|
||||
2019/01/21 -- Synced with libwebp-1.0.2
|
||||
@@ -12,9 +12,9 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dec/alphai_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../utils/quant_levels_dec_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
@@ -11,10 +11,10 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_DEC_ALPHAI_H_
|
||||
#define WEBP_DEC_ALPHAI_H_
|
||||
#ifndef WEBP_DEC_ALPHAI_DEC_H_
|
||||
#define WEBP_DEC_ALPHAI_DEC_H_
|
||||
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../utils/filters_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_ALPHAI_H_ */
|
||||
#endif // WEBP_DEC_ALPHAI_DEC_H_
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecBuffer
|
||||
|
||||
// Number of bytes per pixel for the different color-spaces.
|
||||
static const int kModeBpp[MODE_LAST] = {
|
||||
static const uint8_t kModeBpp[MODE_LAST] = {
|
||||
3, 4, 3, 4, 4, 2, 2,
|
||||
4, 4, 4, 2, // pre-multiplied modes
|
||||
1, 1 };
|
||||
@@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
|
||||
// strictly speaking, the very last (or first, if flipped) row
|
||||
// doesn't require padding.
|
||||
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
|
||||
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
|
||||
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
|
||||
|
||||
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
int ok = 1;
|
||||
@@ -74,7 +74,8 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
} else { // RGB checks
|
||||
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
|
||||
const int stride = abs(buf->stride);
|
||||
const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
|
||||
const uint64_t size =
|
||||
MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
|
||||
ok &= (size <= buf->size);
|
||||
ok &= (stride >= width * kModeBpp[mode]);
|
||||
ok &= (buf->rgba != NULL);
|
||||
@@ -98,9 +99,14 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
|
||||
uint64_t uv_size = 0, a_size = 0, total_size;
|
||||
// We need memory and it hasn't been allocated yet.
|
||||
// => initialize output buffer, now that dimensions are known.
|
||||
const int stride = w * kModeBpp[mode];
|
||||
const uint64_t size = (uint64_t)stride * h;
|
||||
int stride;
|
||||
uint64_t size;
|
||||
|
||||
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
stride = w * kModeBpp[mode];
|
||||
size = (uint64_t)stride * h;
|
||||
if (!WebPIsRGBMode(mode)) {
|
||||
uv_stride = (w + 1) / 2;
|
||||
uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
|
||||
@@ -169,11 +175,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
|
||||
return VP8_STATUS_OK;
|
||||
}
|
||||
|
||||
VP8StatusCode WebPAllocateDecBuffer(int w, int h,
|
||||
VP8StatusCode WebPAllocateDecBuffer(int width, int height,
|
||||
const WebPDecoderOptions* const options,
|
||||
WebPDecBuffer* const out) {
|
||||
WebPDecBuffer* const buffer) {
|
||||
VP8StatusCode status;
|
||||
if (out == NULL || w <= 0 || h <= 0) {
|
||||
if (buffer == NULL || width <= 0 || height <= 0) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
if (options != NULL) { // First, apply options if there is any.
|
||||
@@ -182,33 +188,39 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
|
||||
const int ch = options->crop_height;
|
||||
const int x = options->crop_left & ~1;
|
||||
const int y = options->crop_top & ~1;
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
|
||||
x + cw > width || y + ch > height) {
|
||||
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
|
||||
}
|
||||
w = cw;
|
||||
h = ch;
|
||||
width = cw;
|
||||
height = ch;
|
||||
}
|
||||
|
||||
if (options->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
int scaled_width = options->scaled_width;
|
||||
int scaled_height = options->scaled_height;
|
||||
if (!WebPRescalerGetScaledDimensions(
|
||||
w, h, &scaled_width, &scaled_height)) {
|
||||
width, height, &scaled_width, &scaled_height)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
w = scaled_width;
|
||||
h = scaled_height;
|
||||
width = scaled_width;
|
||||
height = scaled_height;
|
||||
#else
|
||||
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
|
||||
#endif
|
||||
}
|
||||
}
|
||||
out->width = w;
|
||||
out->height = h;
|
||||
buffer->width = width;
|
||||
buffer->height = height;
|
||||
|
||||
// Then, allocate buffer for real.
|
||||
status = AllocateBuffer(out);
|
||||
status = AllocateBuffer(buffer);
|
||||
if (status != VP8_STATUS_OK) return status;
|
||||
|
||||
// Use the stride trick if vertical flip is needed.
|
||||
if (options != NULL && options->flip) {
|
||||
status = WebPFlipBuffer(out);
|
||||
status = WebPFlipBuffer(buffer);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_COMMON_H_
|
||||
#define WEBP_DEC_COMMON_H_
|
||||
#ifndef WEBP_DEC_COMMON_DEC_H_
|
||||
#define WEBP_DEC_COMMON_DEC_H_
|
||||
|
||||
// intra prediction modes
|
||||
enum { B_DC_PRED = 0, // 4x4 modes
|
||||
@@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
||||
NUM_PROBAS = 11
|
||||
};
|
||||
|
||||
#endif // WEBP_DEC_COMMON_H_
|
||||
#endif // WEBP_DEC_COMMON_DEC_H_
|
||||
|
||||
@@ -12,13 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main reconstruction function.
|
||||
|
||||
static const int kScan[16] = {
|
||||
static const uint16_t kScan[16] = {
|
||||
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
|
||||
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
|
||||
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
|
||||
@@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||
#define MIN_DITHER_AMP 4
|
||||
|
||||
#define DITHER_AMP_TAB_SIZE 12
|
||||
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
// roughly, it's dqm->uv_mat_[1]
|
||||
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
|
||||
};
|
||||
@@ -338,7 +338,6 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
|
||||
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
||||
VP8QuantMatrix* const dqm = &dec->dqm_[s];
|
||||
if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
|
||||
// TODO(skal): should we specially dither more for uv_quant_ < 0?
|
||||
const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
|
||||
dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
|
||||
}
|
||||
@@ -400,7 +399,9 @@ static void DitherRow(VP8Decoder* const dec) {
|
||||
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
|
||||
|
||||
// Finalize and transmit a complete row. Return false in case of user-abort.
|
||||
static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
|
||||
static int FinishRow(void* arg1, void* arg2) {
|
||||
VP8Decoder* const dec = (VP8Decoder*)arg1;
|
||||
VP8Io* const io = (VP8Io*)arg2;
|
||||
int ok = 1;
|
||||
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
|
||||
const int cache_id = ctx->id_;
|
||||
@@ -448,10 +449,9 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
|
||||
if (y_end > io->crop_bottom) {
|
||||
y_end = io->crop_bottom; // make sure we don't overflow on last row.
|
||||
}
|
||||
// If dec->alpha_data_ is not NULL, we have some alpha plane present.
|
||||
io->a = NULL;
|
||||
if (dec->alpha_data_ != NULL && y_start < y_end) {
|
||||
// TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
|
||||
// good idea.
|
||||
io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
|
||||
if (io->a == NULL) {
|
||||
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
|
||||
@@ -558,7 +558,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
||||
if (io->bypass_filtering) {
|
||||
dec->filter_type_ = 0;
|
||||
}
|
||||
// TODO(skal): filter type / strength / sharpness forcing
|
||||
|
||||
// Define the area where we can skip in-loop filtering, in case of cropping.
|
||||
//
|
||||
@@ -569,8 +568,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
||||
// Means: there's a dependency chain that goes all the way up to the
|
||||
// top-left corner of the picture (MB #0). We must filter all the previous
|
||||
// macroblocks.
|
||||
// TODO(skal): add an 'approximate_decoding' option, that won't produce
|
||||
// a 1:1 bit-exactness for complex filtering?
|
||||
{
|
||||
const int extra_pixels = kFilterExtraRows[dec->filter_type_];
|
||||
if (dec->filter_type_ == 2) {
|
||||
@@ -651,7 +648,7 @@ static int InitThreadContext(VP8Decoder* const dec) {
|
||||
}
|
||||
worker->data1 = dec;
|
||||
worker->data2 = (void*)&dec->thread_ctx_.io_;
|
||||
worker->hook = (WebPWorkerHook)FinishRow;
|
||||
worker->hook = FinishRow;
|
||||
dec->num_caches_ =
|
||||
(dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
|
||||
} else {
|
||||
@@ -671,15 +668,9 @@ int VP8GetThreadMethod(const WebPDecoderOptions* const options,
|
||||
(void)height;
|
||||
assert(headers == NULL || !headers->is_lossless);
|
||||
#if defined(WEBP_USE_THREAD)
|
||||
if (width < MIN_WIDTH_FOR_THREADS) return 0;
|
||||
// TODO(skal): tune the heuristic further
|
||||
#if 0
|
||||
if (height < 2 * width) return 2;
|
||||
if (width >= MIN_WIDTH_FOR_THREADS) return 2;
|
||||
#endif
|
||||
return 2;
|
||||
#else // !WEBP_USE_THREAD
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef MT_CACHE_LINES
|
||||
@@ -728,7 +719,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
}
|
||||
|
||||
mem = (uint8_t*)dec->mem_;
|
||||
dec->intra_t_ = (uint8_t*)mem;
|
||||
dec->intra_t_ = mem;
|
||||
mem += intra_pred_mode_size;
|
||||
|
||||
dec->yuv_t_ = (VP8TopSamples*)mem;
|
||||
@@ -750,7 +741,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
|
||||
mem = (uint8_t*)WEBP_ALIGN(mem);
|
||||
assert((yuv_size & WEBP_ALIGN_CST) == 0);
|
||||
dec->yuv_b_ = (uint8_t*)mem;
|
||||
dec->yuv_b_ = mem;
|
||||
mem += yuv_size;
|
||||
|
||||
dec->mb_data_ = (VP8MBData*)mem;
|
||||
@@ -766,7 +757,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
const int extra_rows = kFilterExtraRows[dec->filter_type_];
|
||||
const int extra_y = extra_rows * dec->cache_y_stride_;
|
||||
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
|
||||
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
|
||||
dec->cache_y_ = mem + extra_y;
|
||||
dec->cache_u_ = dec->cache_y_
|
||||
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
|
||||
dec->cache_v_ = dec->cache_u_
|
||||
@@ -776,7 +767,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
mem += cache_size;
|
||||
|
||||
// alpha plane
|
||||
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
|
||||
dec->alpha_plane_ = alpha_size ? mem : NULL;
|
||||
mem += alpha_size;
|
||||
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
|
||||
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../dec/alphai_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
// In append mode, buffer allocations increase as multiples of this value.
|
||||
@@ -140,10 +140,9 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
|
||||
if (NeedCompressedAlpha(idec)) {
|
||||
ALPHDecoder* const alph_dec = dec->alph_dec_;
|
||||
dec->alpha_data_ += offset;
|
||||
if (alph_dec != NULL) {
|
||||
if (alph_dec != NULL && alph_dec->vp8l_dec_ != NULL) {
|
||||
if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
|
||||
VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
|
||||
assert(alph_vp8l_dec != NULL);
|
||||
assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
|
||||
VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
|
||||
dec->alpha_data_ + ALPHA_HEADER_LEN,
|
||||
@@ -283,10 +282,8 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
|
||||
|
||||
static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
|
||||
if (idec->state_ == STATE_VP8_DATA) {
|
||||
VP8Io* const io = &idec->io_;
|
||||
if (io->teardown != NULL) {
|
||||
io->teardown(io);
|
||||
}
|
||||
// Synchronize the thread, clean-up and check for errors.
|
||||
VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
|
||||
}
|
||||
idec->state_ = STATE_ERROR;
|
||||
return error;
|
||||
@@ -451,7 +448,10 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
|
||||
VP8Io* const io = &idec->io_;
|
||||
|
||||
assert(dec->ready_);
|
||||
// Make sure partition #0 has been read before, to set dec to ready_.
|
||||
if (!dec->ready_) {
|
||||
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
|
||||
if (idec->last_mb_y_ != dec->mb_y_) {
|
||||
if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
|
||||
@@ -473,6 +473,12 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
|
||||
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
// Synchronize the threads.
|
||||
if (dec->mt_method_ > 0) {
|
||||
if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) {
|
||||
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
}
|
||||
RestoreContext(&context, dec, token_br);
|
||||
return VP8_STATUS_SUSPENDED;
|
||||
}
|
||||
@@ -491,6 +497,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
}
|
||||
// Synchronize the thread and check for errors.
|
||||
if (!VP8ExitCritical(dec, io)) {
|
||||
idec->state_ = STATE_ERROR; // prevent re-entry in IDecError
|
||||
return IDecError(idec, VP8_STATUS_USER_ABORT);
|
||||
}
|
||||
dec->ready_ = 0;
|
||||
@@ -571,6 +578,10 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
|
||||
status = DecodePartition0(idec);
|
||||
}
|
||||
if (idec->state_ == STATE_VP8_DATA) {
|
||||
const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
|
||||
if (dec == NULL) {
|
||||
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
|
||||
}
|
||||
status = DecodeRemaining(idec);
|
||||
}
|
||||
if (idec->state_ == STATE_VP8L_HEADER) {
|
||||
@@ -673,12 +684,12 @@ void WebPIDelete(WebPIDecoder* idec) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Wrapper toward WebPINewDecoder
|
||||
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
|
||||
size_t output_buffer_size, int output_stride) {
|
||||
const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
|
||||
WebPIDecoder* idec;
|
||||
|
||||
if (mode >= MODE_YUV) return NULL;
|
||||
if (csp >= MODE_YUV) return NULL;
|
||||
if (is_external_memory == 0) { // Overwrite parameters to sane values.
|
||||
output_buffer_size = 0;
|
||||
output_stride = 0;
|
||||
@@ -689,7 +700,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
|
||||
}
|
||||
idec = WebPINewDecoder(NULL);
|
||||
if (idec == NULL) return NULL;
|
||||
idec->output_.colorspace = mode;
|
||||
idec->output_.colorspace = csp;
|
||||
idec->output_.is_external_memory = is_external_memory;
|
||||
idec->output_.u.RGBA.rgba = output_buffer;
|
||||
idec->output_.u.RGBA.stride = output_stride;
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/yuv.h"
|
||||
#include "../utils/utils.h"
|
||||
@@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
int num_rows;
|
||||
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@@ -241,6 +241,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
//------------------------------------------------------------------------------
|
||||
// YUV rescaling (no final RGB conversion needed)
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int Rescale(const uint8_t* src, int src_stride,
|
||||
int new_lines, WebPRescaler* const wrk) {
|
||||
int num_lines_out = 0;
|
||||
@@ -431,7 +432,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
|
||||
int max_lines_out) {
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@@ -541,6 +542,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Default custom functions
|
||||
|
||||
@@ -561,10 +564,14 @@ static int CustomSetup(VP8Io* io) {
|
||||
WebPInitUpsamplers();
|
||||
}
|
||||
if (io->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
|
||||
if (!ok) {
|
||||
return 0; // memory error
|
||||
}
|
||||
#else
|
||||
return 0; // rescaling support not compiled
|
||||
#endif
|
||||
} else {
|
||||
if (is_rgb) {
|
||||
WebPInitSamplers();
|
||||
@@ -598,9 +605,6 @@ static int CustomSetup(VP8Io* io) {
|
||||
}
|
||||
}
|
||||
|
||||
if (is_rgb) {
|
||||
VP8YUVInit();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
|
||||
static WEBP_INLINE int clip(int v, int M) {
|
||||
return v < 0 ? 0 : v > M ? M : v;
|
||||
|
||||
@@ -11,15 +11,19 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
#define USE_GENERIC_TREE 0
|
||||
#endif
|
||||
#endif // USE_GENERIC_TREE
|
||||
|
||||
#ifdef USE_GENERIC_TREE
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
static const int8_t kYModesIntra4[18] = {
|
||||
-B_DC_PRED, 1,
|
||||
-B_TM_PRED, 2,
|
||||
@@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
int x;
|
||||
for (x = 0; x < 4; ++x) {
|
||||
const uint8_t* const prob = kBModesProba[top[x]][ymode];
|
||||
#ifdef USE_GENERIC_TREE
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
// Generic tree-parsing
|
||||
int i = kYModesIntra4[VP8GetBit(br, prob[0])];
|
||||
while (i > 0) {
|
||||
@@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
(!VP8GetBit(br, prob[6]) ? B_LD_PRED :
|
||||
(!VP8GetBit(br, prob[7]) ? B_VL_PRED :
|
||||
(!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
|
||||
#endif // USE_GENERIC_TREE
|
||||
#endif // USE_GENERIC_TREE
|
||||
top[x] = ymode;
|
||||
}
|
||||
memcpy(modes, top, 4 * sizeof(*top));
|
||||
@@ -498,7 +502,7 @@ static const uint8_t
|
||||
|
||||
// Paragraph 9.9
|
||||
|
||||
static const int kBands[16 + 1] = {
|
||||
static const uint8_t kBands[16 + 1] = {
|
||||
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
0 // extra entry as sentinel
|
||||
};
|
||||
|
||||
@@ -13,10 +13,10 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/alphai_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
@@ -491,7 +491,7 @@ static int GetCoeffsAlt(VP8BitReader* const br,
|
||||
return 16;
|
||||
}
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) {
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) {
|
||||
if (GetCoeffs == NULL) {
|
||||
if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
|
||||
GetCoeffs = GetCoeffsAlt;
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_WEBP_DECODE_VP8_H_
|
||||
#define WEBP_WEBP_DECODE_VP8_H_
|
||||
#ifndef WEBP_DEC_VP8_DEC_H_
|
||||
#define WEBP_DEC_VP8_DEC_H_
|
||||
|
||||
#include "../webp/decode.h"
|
||||
|
||||
@@ -33,7 +33,7 @@ extern "C" {
|
||||
// /* customize io's functions (setup()/put()/teardown()) if needed. */
|
||||
//
|
||||
// VP8Decoder* dec = VP8New();
|
||||
// bool ok = VP8Decode(dec);
|
||||
// int ok = VP8Decode(dec, &io);
|
||||
// if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
|
||||
// VP8Delete(dec);
|
||||
// return ok;
|
||||
@@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
|
||||
// Miscellaneous VP8/VP8L bitstream probing functions.
|
||||
|
||||
// Returns true if the next 3 bytes in data contain the VP8 signature.
|
||||
WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
|
||||
// Validates the VP8 data-header and retrieves basic header information viz
|
||||
// width and height. Returns 0 in case of formatting error. *width/*height
|
||||
// can be passed NULL.
|
||||
WEBP_EXTERN(int) VP8GetInfo(
|
||||
WEBP_EXTERN int VP8GetInfo(
|
||||
const uint8_t* data,
|
||||
size_t data_size, // data available so far
|
||||
size_t chunk_size, // total data size expected in the chunk
|
||||
int* const width, int* const height);
|
||||
|
||||
// Returns true if the next byte(s) in data is a VP8L signature.
|
||||
WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
|
||||
// Validates the VP8L data-header and retrieves basic header information viz
|
||||
// width, height and alpha. Returns 0 in case of formatting error.
|
||||
// width/height/has_alpha can be passed NULL.
|
||||
WEBP_EXTERN(int) VP8LGetInfo(
|
||||
WEBP_EXTERN int VP8LGetInfo(
|
||||
const uint8_t* data, size_t data_size, // data available so far
|
||||
int* const width, int* const height, int* const has_alpha);
|
||||
|
||||
@@ -182,4 +182,4 @@ WEBP_EXTERN(int) VP8LGetInfo(
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_DECODE_VP8_H_ */
|
||||
#endif // WEBP_DEC_VP8_DEC_H_
|
||||
|
||||
@@ -11,12 +11,12 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8I_H_
|
||||
#define WEBP_DEC_VP8I_H_
|
||||
#ifndef WEBP_DEC_VP8I_DEC_H_
|
||||
#define WEBP_DEC_VP8I_DEC_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "./common_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dec/common_dec.h"
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/random_utils.h"
|
||||
#include "../utils/thread_utils.h"
|
||||
@@ -30,9 +30,9 @@ extern "C" {
|
||||
// Various defines and enums
|
||||
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 0
|
||||
#define DEC_MIN_VERSION 6
|
||||
#define DEC_REV_VERSION 0
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 0
|
||||
#define DEC_REV_VERSION 2
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
@@ -57,7 +57,6 @@ extern "C" {
|
||||
// '|' = left sample, '-' = top sample, '+' = top-left sample
|
||||
// 't' = extra top-right sample for 4x4 modes
|
||||
#define YUV_SIZE (BPS * 17 + BPS * 9)
|
||||
#define Y_SIZE (BPS * 17)
|
||||
#define Y_OFF (BPS * 1 + 8)
|
||||
#define U_OFF (Y_OFF + BPS * 16 + BPS)
|
||||
#define V_OFF (U_OFF + 16)
|
||||
@@ -317,4 +316,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8I_H_ */
|
||||
#endif // WEBP_DEC_VP8I_DEC_H_
|
||||
|
||||
+108
-37
@@ -14,8 +14,8 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dec/alphai_dec.h"
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/lossless_common.h"
|
||||
@@ -28,8 +28,8 @@
|
||||
|
||||
static const int kCodeLengthLiterals = 16;
|
||||
static const int kCodeLengthRepeatCode = 16;
|
||||
static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Five Huffman codes are used at each meta code:
|
||||
@@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
|
||||
// All values computed for 8-bit first level lookup with Mark Adler's tool:
|
||||
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
|
||||
#define FIXED_TABLE_SIZE (630 * 3 + 410)
|
||||
static const int kTableSize[12] = {
|
||||
static const uint16_t kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 654,
|
||||
FIXED_TABLE_SIZE + 656,
|
||||
FIXED_TABLE_SIZE + 658,
|
||||
@@ -362,12 +362,19 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
VP8LMetadata* const hdr = &dec->hdr_;
|
||||
uint32_t* huffman_image = NULL;
|
||||
HTreeGroup* htree_groups = NULL;
|
||||
// When reading htrees, some might be unused, as the format allows it.
|
||||
// We will still read them but put them in this htree_group_bogus.
|
||||
HTreeGroup htree_group_bogus;
|
||||
HuffmanCode* huffman_tables = NULL;
|
||||
HuffmanCode* huffman_tables_bogus = NULL;
|
||||
HuffmanCode* next = NULL;
|
||||
int num_htree_groups = 1;
|
||||
int num_htree_groups_max = 1;
|
||||
int max_alphabet_size = 0;
|
||||
int* code_lengths = NULL;
|
||||
const int table_size = kTableSize[color_cache_bits];
|
||||
int* mapping = NULL;
|
||||
int ok = 0;
|
||||
|
||||
if (allow_recursion && VP8LReadBits(br, 1)) {
|
||||
// use meta Huffman codes.
|
||||
@@ -384,10 +391,42 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
// The huffman data is stored in red and green bytes.
|
||||
const int group = (huffman_image[i] >> 8) & 0xffff;
|
||||
huffman_image[i] = group;
|
||||
if (group >= num_htree_groups) {
|
||||
num_htree_groups = group + 1;
|
||||
if (group >= num_htree_groups_max) {
|
||||
num_htree_groups_max = group + 1;
|
||||
}
|
||||
}
|
||||
// Check the validity of num_htree_groups_max. If it seems too big, use a
|
||||
// smaller value for later. This will prevent big memory allocations to end
|
||||
// up with a bad bitstream anyway.
|
||||
// The value of 1000 is totally arbitrary. We know that num_htree_groups_max
|
||||
// is smaller than (1 << 16) and should be smaller than the number of pixels
|
||||
// (though the format allows it to be bigger).
|
||||
if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
|
||||
// Create a mapping from the used indices to the minimal set of used
|
||||
// values [0, num_htree_groups)
|
||||
mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
|
||||
if (mapping == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
goto Error;
|
||||
}
|
||||
// -1 means a value is unmapped, and therefore unused in the Huffman
|
||||
// image.
|
||||
memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
|
||||
for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
|
||||
// Get the current mapping for the group and remap the Huffman image.
|
||||
int* const mapped_group = &mapping[huffman_image[i]];
|
||||
if (*mapped_group == -1) *mapped_group = num_htree_groups++;
|
||||
huffman_image[i] = *mapped_group;
|
||||
}
|
||||
huffman_tables_bogus = (HuffmanCode*)WebPSafeMalloc(
|
||||
table_size, sizeof(*huffman_tables_bogus));
|
||||
if (huffman_tables_bogus == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
goto Error;
|
||||
}
|
||||
} else {
|
||||
num_htree_groups = num_htree_groups_max;
|
||||
}
|
||||
}
|
||||
|
||||
if (br->eos_) goto Error;
|
||||
@@ -403,11 +442,11 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
}
|
||||
}
|
||||
|
||||
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
|
||||
sizeof(*code_lengths));
|
||||
huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
|
||||
sizeof(*huffman_tables));
|
||||
htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
|
||||
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
|
||||
sizeof(*code_lengths));
|
||||
|
||||
if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
@@ -415,28 +454,35 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
}
|
||||
|
||||
next = huffman_tables;
|
||||
for (i = 0; i < num_htree_groups; ++i) {
|
||||
HTreeGroup* const htree_group = &htree_groups[i];
|
||||
for (i = 0; i < num_htree_groups_max; ++i) {
|
||||
// If the index "i" is unused in the Huffman image, read the coefficients
|
||||
// but store them to a bogus htree_group.
|
||||
const int is_bogus = (mapping != NULL && mapping[i] == -1);
|
||||
HTreeGroup* const htree_group =
|
||||
is_bogus ? &htree_group_bogus :
|
||||
&htree_groups[(mapping == NULL) ? i : mapping[i]];
|
||||
HuffmanCode** const htrees = htree_group->htrees;
|
||||
HuffmanCode* huffman_tables_i = is_bogus ? huffman_tables_bogus : next;
|
||||
int size;
|
||||
int total_size = 0;
|
||||
int is_trivial_literal = 1;
|
||||
int max_bits = 0;
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
htrees[j] = next;
|
||||
htrees[j] = huffman_tables_i;
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += 1 << color_cache_bits;
|
||||
}
|
||||
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
|
||||
size =
|
||||
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables_i);
|
||||
if (size == 0) {
|
||||
goto Error;
|
||||
}
|
||||
if (is_trivial_literal && kLiteralMap[j] == 1) {
|
||||
is_trivial_literal = (next->bits == 0);
|
||||
is_trivial_literal = (huffman_tables_i->bits == 0);
|
||||
}
|
||||
total_size += next->bits;
|
||||
next += size;
|
||||
total_size += huffman_tables_i->bits;
|
||||
huffman_tables_i += size;
|
||||
if (j <= ALPHA) {
|
||||
int local_max_bits = code_lengths[0];
|
||||
int k;
|
||||
@@ -448,43 +494,47 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
max_bits += local_max_bits;
|
||||
}
|
||||
}
|
||||
if (!is_bogus) next = huffman_tables_i;
|
||||
htree_group->is_trivial_literal = is_trivial_literal;
|
||||
htree_group->is_trivial_code = 0;
|
||||
if (is_trivial_literal) {
|
||||
const int red = htrees[RED][0].value;
|
||||
const int blue = htrees[BLUE][0].value;
|
||||
const int alpha = htrees[ALPHA][0].value;
|
||||
htree_group->literal_arb =
|
||||
((uint32_t)alpha << 24) | (red << 16) | blue;
|
||||
htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
|
||||
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
|
||||
htree_group->is_trivial_code = 1;
|
||||
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
|
||||
}
|
||||
}
|
||||
htree_group->use_packed_table = !htree_group->is_trivial_code &&
|
||||
(max_bits < HUFFMAN_PACKED_BITS);
|
||||
htree_group->use_packed_table =
|
||||
!htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
|
||||
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
|
||||
}
|
||||
WebPSafeFree(code_lengths);
|
||||
ok = 1;
|
||||
|
||||
// All OK. Finalize pointers and return.
|
||||
// All OK. Finalize pointers.
|
||||
hdr->huffman_image_ = huffman_image;
|
||||
hdr->num_htree_groups_ = num_htree_groups;
|
||||
hdr->htree_groups_ = htree_groups;
|
||||
hdr->huffman_tables_ = huffman_tables;
|
||||
return 1;
|
||||
|
||||
Error:
|
||||
WebPSafeFree(code_lengths);
|
||||
WebPSafeFree(huffman_image);
|
||||
WebPSafeFree(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
return 0;
|
||||
WebPSafeFree(huffman_tables_bogus);
|
||||
WebPSafeFree(mapping);
|
||||
if (!ok) {
|
||||
WebPSafeFree(huffman_image);
|
||||
WebPSafeFree(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scaling.
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
const int num_channels = 4;
|
||||
const int in_width = io->mb_w;
|
||||
@@ -516,10 +566,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
out_width, out_height, 0, num_channels, work);
|
||||
return 1;
|
||||
}
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Export to ARGB
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
// We have special "export" function since we need to convert from BGRA
|
||||
static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
|
||||
int rgba_stride, uint8_t* const rgba) {
|
||||
@@ -561,6 +614,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
|
||||
return num_lines_out;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
// Emit rows without any scaling.
|
||||
static int EmitRows(WEBP_CSP_MODE colorspace,
|
||||
const uint8_t* row_in, int in_stride,
|
||||
@@ -746,9 +801,12 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
|
||||
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
|
||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
|
||||
const int num_rows_out = io->use_scaling ?
|
||||
const int num_rows_out =
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
io->use_scaling ?
|
||||
EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
|
||||
rgba, buf->stride) :
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
EmitRows(output->colorspace, rows_data, in_stride,
|
||||
io->mb_w, io->mb_h, rgba, buf->stride);
|
||||
// Update 'last_out_row_'.
|
||||
@@ -875,7 +933,11 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
|
||||
#endif
|
||||
break;
|
||||
case 2:
|
||||
#if !defined(WORDS_BIGENDIAN)
|
||||
memcpy(&pattern, src, sizeof(uint16_t));
|
||||
#else
|
||||
pattern = ((uint32_t)src[0] << 8) | src[1];
|
||||
#endif
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
pattern |= pattern << 16;
|
||||
#elif defined(WEBP_USE_MIPS_DSP_R2)
|
||||
@@ -1012,12 +1074,13 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
||||
ok = 0;
|
||||
goto End;
|
||||
}
|
||||
assert(br->eos_ == VP8LIsEndOfStream(br));
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
}
|
||||
// Process the remaining rows corresponding to last row-block.
|
||||
ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
|
||||
|
||||
End:
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
if (!ok || (br->eos_ && pos < end)) {
|
||||
ok = 0;
|
||||
dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
|
||||
@@ -1090,11 +1153,12 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
VP8LFillBitWindow(br);
|
||||
if (htree_group->use_packed_table) {
|
||||
code = ReadPackedSymbols(htree_group, br, src);
|
||||
if (VP8LIsEndOfStream(br)) break;
|
||||
if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne;
|
||||
} else {
|
||||
code = ReadSymbol(htree_group->htrees[GREEN], br);
|
||||
}
|
||||
if (br->eos_) break; // early out
|
||||
if (VP8LIsEndOfStream(br)) break;
|
||||
if (code < NUM_LITERAL_CODES) { // Literal
|
||||
if (htree_group->is_trivial_literal) {
|
||||
*src = htree_group->literal_arb | (code << 8);
|
||||
@@ -1104,7 +1168,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
VP8LFillBitWindow(br);
|
||||
blue = ReadSymbol(htree_group->htrees[BLUE], br);
|
||||
alpha = ReadSymbol(htree_group->htrees[ALPHA], br);
|
||||
if (br->eos_) break;
|
||||
if (VP8LIsEndOfStream(br)) break;
|
||||
*src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue;
|
||||
}
|
||||
AdvanceByOne:
|
||||
@@ -1132,7 +1196,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
VP8LFillBitWindow(br);
|
||||
dist_code = GetCopyDistance(dist_symbol, br);
|
||||
dist = PlaneCodeToDistance(width, dist_code);
|
||||
if (br->eos_) break;
|
||||
if (VP8LIsEndOfStream(br)) break;
|
||||
if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
|
||||
goto Error;
|
||||
} else {
|
||||
@@ -1169,9 +1233,9 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
} else { // Not reached
|
||||
goto Error;
|
||||
}
|
||||
assert(br->eos_ == VP8LIsEndOfStream(br));
|
||||
}
|
||||
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
if (dec->incremental_ && br->eos_ && src < src_end) {
|
||||
RestoreState(dec);
|
||||
} else if (!br->eos_) {
|
||||
@@ -1512,7 +1576,6 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
|
||||
if (dec == NULL) return 0;
|
||||
|
||||
assert(alph_dec != NULL);
|
||||
alph_dec->vp8l_dec_ = dec;
|
||||
|
||||
dec->width_ = alph_dec->width_;
|
||||
dec->height_ = alph_dec->height_;
|
||||
@@ -1544,11 +1607,12 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
|
||||
|
||||
if (!ok) goto Err;
|
||||
|
||||
// Only set here, once we are sure it is valid (to avoid thread races).
|
||||
alph_dec->vp8l_dec_ = dec;
|
||||
return 1;
|
||||
|
||||
Err:
|
||||
VP8LDelete(alph_dec->vp8l_dec_);
|
||||
alph_dec->vp8l_dec_ = NULL;
|
||||
VP8LDelete(dec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1630,12 +1694,19 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
|
||||
if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
|
||||
|
||||
#else
|
||||
if (io->use_scaling) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
goto Err;
|
||||
}
|
||||
#endif
|
||||
if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
|
||||
// need the alpha-multiply functions for premultiplied output or rescaling
|
||||
WebPInitAlphaProcessing();
|
||||
}
|
||||
|
||||
if (!WebPIsRGBMode(dec->output_->colorspace)) {
|
||||
WebPInitConvertARGBToYUV();
|
||||
if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
|
||||
|
||||
@@ -12,11 +12,11 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
// Vikas Arora(vikaas.arora@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8LI_H_
|
||||
#define WEBP_DEC_VP8LI_H_
|
||||
#ifndef WEBP_DEC_VP8LI_DEC_H_
|
||||
#define WEBP_DEC_VP8LI_DEC_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/color_cache_utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
@@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8LI_H_ */
|
||||
#endif // WEBP_DEC_VP8LI_DEC_H_
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../dec/webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/mux_types.h" // ALPHA_FLAG
|
||||
|
||||
@@ -421,7 +421,9 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
|
||||
NULL, NULL, NULL, &has_animation,
|
||||
NULL, headers);
|
||||
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
|
||||
// TODO(jzern): full support of animation frames will require API additions.
|
||||
// The WebPDemux API + libwebp can be used to decode individual
|
||||
// uncomposited frames or the WebPAnimDecoder can be used to fully
|
||||
// reconstruct them (see webp/demux.h).
|
||||
if (has_animation) {
|
||||
status = VP8_STATUS_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
@@ -11,15 +11,15 @@
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#ifndef WEBP_DEC_WEBPI_H_
|
||||
#define WEBP_DEC_WEBPI_H_
|
||||
#ifndef WEBP_DEC_WEBPI_DEC_H_
|
||||
#define WEBP_DEC_WEBPI_DEC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "../utils/rescaler_utils.h"
|
||||
#include "./vp8_dec.h"
|
||||
#include "../dec/vp8_dec.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecParams: Decoding output parameters. Transient internal object.
|
||||
@@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_WEBPI_H_ */
|
||||
#endif // WEBP_DEC_WEBPI_DEC_H_
|
||||
|
||||
@@ -23,8 +23,8 @@
|
||||
#include "../webp/demux.h"
|
||||
#include "../webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 0
|
||||
#define DMUX_MIN_VERSION 3
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 0
|
||||
#define DMUX_REV_VERSION 2
|
||||
|
||||
typedef struct {
|
||||
@@ -205,12 +205,14 @@ static void SetFrameInfo(size_t start_offset, size_t size,
|
||||
frame->complete_ = complete;
|
||||
}
|
||||
|
||||
// Store image bearing chunks to 'frame'.
|
||||
// Store image bearing chunks to 'frame'. 'min_size' is an optional size
|
||||
// requirement, it may be zero.
|
||||
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
|
||||
MemBuffer* const mem, Frame* const frame) {
|
||||
int alpha_chunks = 0;
|
||||
int image_chunks = 0;
|
||||
int done = (MemDataSize(mem) < min_size);
|
||||
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
|
||||
MemDataSize(mem) < min_size);
|
||||
ParseStatus status = PARSE_OK;
|
||||
|
||||
if (done) return PARSE_NEED_MORE_DATA;
|
||||
@@ -401,9 +403,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
|
||||
frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
|
||||
if (frame == NULL) return PARSE_ERROR;
|
||||
|
||||
// For the single image case we allow parsing of a partial frame, but we need
|
||||
// at least CHUNK_HEADER_SIZE for parsing.
|
||||
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
|
||||
// For the single image case we allow parsing of a partial frame, so no
|
||||
// minimum size is imposed here.
|
||||
status = StoreFrame(1, 0, &dmux->mem_, frame);
|
||||
if (status != PARSE_ERROR) {
|
||||
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
|
||||
// Clear any alpha when the alpha flag is missing.
|
||||
|
||||
@@ -12,10 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
// Tables can be faster on some platform but incur some extra binary size (~2k).
|
||||
// #define USE_TABLES_FOR_ALPHA_MULT
|
||||
#if !defined(USE_TABLES_FOR_ALPHA_MULT)
|
||||
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef USE_TABLES_FOR_ALPHA_MULT
|
||||
#if (USE_TABLES_FOR_ALPHA_MULT == 1)
|
||||
|
||||
static const uint32_t kMultTables[2][256] = {
|
||||
{ // (255u << MFIX) / alpha
|
||||
@@ -132,9 +135,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
|
||||
return inverse ? (255u << MFIX) / a : a * KINV_255;
|
||||
}
|
||||
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t argb = ptr[x];
|
||||
@@ -154,8 +157,8 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t a = alpha[x];
|
||||
@@ -217,8 +220,9 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
|
||||
#endif
|
||||
|
||||
static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
while (h-- > 0) {
|
||||
uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
|
||||
const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
|
||||
@@ -235,6 +239,7 @@ static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
|
||||
rgba += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
@@ -254,9 +259,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
|
||||
return (x * m) >> 16;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
while (h-- > 0) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
@@ -275,15 +280,16 @@ static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
|
||||
}
|
||||
#undef MULTIPLIER
|
||||
|
||||
static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
|
||||
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
|
||||
#else
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
@@ -338,6 +344,46 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int HasAlpha8b_C(const uint8_t* src, int length) {
|
||||
while (length-- > 0) if (*src++ != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_C(const uint8_t* src, int length) {
|
||||
int x;
|
||||
for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple channel manipulations.
|
||||
|
||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
offset += step;
|
||||
}
|
||||
}
|
||||
|
||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||
@@ -345,6 +391,15 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int, uint32_t*);
|
||||
#endif
|
||||
void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init function
|
||||
@@ -354,21 +409,25 @@ extern void WebPInitAlphaProcessingSSE2(void);
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
extern void WebPInitAlphaProcessingNEON(void);
|
||||
|
||||
static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPMultARGBRow = WebPMultARGBRowC;
|
||||
WebPMultRow = WebPMultRowC;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
||||
WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
|
||||
WebPMultARGBRow = WebPMultARGBRow_C;
|
||||
WebPMultRow = WebPMultRow_C;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
WebPPackARGB = PackARGB_C;
|
||||
#endif
|
||||
WebPPackRGB = PackRGB_C;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
|
||||
WebPDispatchAlpha = DispatchAlpha_C;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
|
||||
WebPExtractAlpha = ExtractAlpha_C;
|
||||
WebPExtractGreen = ExtractGreen_C;
|
||||
#endif
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_C;
|
||||
WebPHasAlpha32b = HasAlpha32b_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@@ -382,16 +441,32 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
WebPInitAlphaProcessingMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPMultARGBRow != NULL);
|
||||
assert(WebPMultRow != NULL);
|
||||
assert(WebPApplyAlphaMultiply != NULL);
|
||||
assert(WebPApplyAlphaMultiply4444 != NULL);
|
||||
assert(WebPDispatchAlpha != NULL);
|
||||
assert(WebPDispatchAlphaToGreen != NULL);
|
||||
assert(WebPExtractAlpha != NULL);
|
||||
assert(WebPExtractGreen != NULL);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
assert(WebPPackARGB != NULL);
|
||||
#endif
|
||||
assert(WebPPackRGB != NULL);
|
||||
assert(WebPHasAlpha8b != NULL);
|
||||
assert(WebPHasAlpha32b != NULL);
|
||||
}
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@@ -72,9 +72,9 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_and != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@@ -210,6 +210,61 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Alpha detection
|
||||
|
||||
static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
for (; i + 16 <= length; i += 16) {
|
||||
const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i < length; ++i) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i alpha_mask = _mm_set1_epi32(0xff);
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
// We don't know if we can access the last 3 bytes after the last alpha
|
||||
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
|
||||
// or the last byte of the quadruplet). Hence the '-3' protection below.
|
||||
length = length * 4 - 3; // size in bytes
|
||||
for (; i + 64 <= length; i += 64) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i b2 = _mm_and_si128(a2, alpha_mask);
|
||||
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
|
||||
const __m128i c0 = _mm_packs_epi32(b0, b1);
|
||||
const __m128i c1 = _mm_packs_epi32(b2, b3);
|
||||
const __m128i d = _mm_packus_epi16(c0, c1);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i + 32 <= length; i += 32) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i c = _mm_packs_epi32(b0, b1);
|
||||
const __m128i d = _mm_packus_epi16(c, c);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Apply alpha value to rows
|
||||
|
||||
@@ -238,7 +293,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
|
||||
if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
|
||||
}
|
||||
|
||||
static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
@@ -261,7 +316,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
|
||||
if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -273,9 +328,12 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
|
||||
WebPMultARGBRow = MultARGBRow_SSE2;
|
||||
WebPMultRow = MultRow_SSE2;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
WebPDispatchAlpha = DispatchAlpha_SSE2;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE2;
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_SSE2;
|
||||
WebPHasAlpha32b = HasAlpha32b_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
@@ -19,9 +19,9 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@@ -82,7 +82,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
||||
@@ -128,9 +128,9 @@ static WEBP_INLINE void VP8Transpose_2_4x4_16b(
|
||||
// Pack the planar buffers
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
|
||||
static WEBP_INLINE void VP8PlanarTo24b(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5) {
|
||||
static WEBP_INLINE void VP8PlanarTo24b_SSE2(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@@ -159,10 +159,10 @@ static WEBP_INLINE void VP8PlanarTo24b(__m128i* const in0, __m128i* const in1,
|
||||
|
||||
// Convert four packed four-channel buffers like argbargbargbargb... into the
|
||||
// split channels aaaaa ... rrrr ... gggg .... bbbbb ......
|
||||
static WEBP_INLINE void VP8L32bToPlanar(__m128i* const in0,
|
||||
__m128i* const in1,
|
||||
__m128i* const in2,
|
||||
__m128i* const in3) {
|
||||
static WEBP_INLINE void VP8L32bToPlanar_SSE2(__m128i* const in0,
|
||||
__m128i* const in1,
|
||||
__m128i* const in2,
|
||||
__m128i* const in3) {
|
||||
// Column-wise transpose.
|
||||
const __m128i A0 = _mm_unpacklo_epi8(*in0, *in1);
|
||||
const __m128i A1 = _mm_unpackhi_epi8(*in0, *in1);
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
// Copyright 2016 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// SSE4 code common to several files.
|
||||
//
|
||||
// Author: Vincent Rabaud (vrabaud@google.com)
|
||||
|
||||
#ifndef WEBP_DSP_COMMON_SSE41_H_
|
||||
#define WEBP_DSP_COMMON_SSE41_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Channel mixing.
|
||||
// Shuffles the input buffer as A0 0 0 A1 0 0 A2 ...
|
||||
#define WEBP_SSE41_SHUFF(OUT, IN0, IN1) \
|
||||
OUT##0 = _mm_shuffle_epi8(*IN0, shuff0); \
|
||||
OUT##1 = _mm_shuffle_epi8(*IN0, shuff1); \
|
||||
OUT##2 = _mm_shuffle_epi8(*IN0, shuff2); \
|
||||
OUT##3 = _mm_shuffle_epi8(*IN1, shuff0); \
|
||||
OUT##4 = _mm_shuffle_epi8(*IN1, shuff1); \
|
||||
OUT##5 = _mm_shuffle_epi8(*IN1, shuff2);
|
||||
|
||||
// Pack the planar buffers
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
|
||||
static WEBP_INLINE void VP8PlanarTo24b_SSE41(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5) {
|
||||
__m128i R0, R1, R2, R3, R4, R5;
|
||||
__m128i G0, G1, G2, G3, G4, G5;
|
||||
__m128i B0, B1, B2, B3, B4, B5;
|
||||
|
||||
// Process R.
|
||||
{
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
-1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
|
||||
WEBP_SSE41_SHUFF(R, in0, in1)
|
||||
}
|
||||
|
||||
// Process G.
|
||||
{
|
||||
// Same as before, just shifted to the left by one and including the right
|
||||
// padding.
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
-1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
|
||||
WEBP_SSE41_SHUFF(G, in2, in3)
|
||||
}
|
||||
|
||||
// Process B.
|
||||
{
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
|
||||
WEBP_SSE41_SHUFF(B, in4, in5)
|
||||
}
|
||||
|
||||
// OR the different channels.
|
||||
{
|
||||
const __m128i RG0 = _mm_or_si128(R0, G0);
|
||||
const __m128i RG1 = _mm_or_si128(R1, G1);
|
||||
const __m128i RG2 = _mm_or_si128(R2, G2);
|
||||
const __m128i RG3 = _mm_or_si128(R3, G3);
|
||||
const __m128i RG4 = _mm_or_si128(R4, G4);
|
||||
const __m128i RG5 = _mm_or_si128(R5, G5);
|
||||
*in0 = _mm_or_si128(RG0, B0);
|
||||
*in1 = _mm_or_si128(RG1, B1);
|
||||
*in2 = _mm_or_si128(RG2, B2);
|
||||
*in3 = _mm_or_si128(RG3, B3);
|
||||
*in4 = _mm_or_si128(RG4, B4);
|
||||
*in5 = _mm_or_si128(RG5, B5);
|
||||
}
|
||||
}
|
||||
|
||||
#undef WEBP_SSE41_SHUFF
|
||||
|
||||
// Convert four packed four-channel buffers like argbargbargbargb... into the
|
||||
// split channels aaaaa ... rrrr ... gggg .... bbbbb ......
|
||||
static WEBP_INLINE void VP8L32bToPlanar_SSE41(__m128i* const in0,
|
||||
__m128i* const in1,
|
||||
__m128i* const in2,
|
||||
__m128i* const in3) {
|
||||
// aaaarrrrggggbbbb
|
||||
const __m128i shuff0 =
|
||||
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);
|
||||
const __m128i A0 = _mm_shuffle_epi8(*in0, shuff0);
|
||||
const __m128i A1 = _mm_shuffle_epi8(*in1, shuff0);
|
||||
const __m128i A2 = _mm_shuffle_epi8(*in2, shuff0);
|
||||
const __m128i A3 = _mm_shuffle_epi8(*in3, shuff0);
|
||||
// A0A1R0R1
|
||||
// G0G1B0B1
|
||||
// A2A3R2R3
|
||||
// G0G1B0B1
|
||||
const __m128i B0 = _mm_unpacklo_epi32(A0, A1);
|
||||
const __m128i B1 = _mm_unpackhi_epi32(A0, A1);
|
||||
const __m128i B2 = _mm_unpacklo_epi32(A2, A3);
|
||||
const __m128i B3 = _mm_unpackhi_epi32(A2, A3);
|
||||
*in3 = _mm_unpacklo_epi64(B0, B2);
|
||||
*in2 = _mm_unpackhi_epi64(B0, B2);
|
||||
*in1 = _mm_unpacklo_epi64(B1, B3);
|
||||
*in0 = _mm_unpackhi_epi64(B1, B3);
|
||||
}
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_DSP_COMMON_SSE41_H_
|
||||
+243
-151
@@ -11,7 +11,9 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include <assert.h>
|
||||
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
@@ -25,7 +27,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
#define STORE(x, y, v) \
|
||||
dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
|
||||
dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
#define STORE2(y, dc, d, c) do { \
|
||||
const int DC = (dc); \
|
||||
@@ -38,7 +40,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
#define MUL1(a) ((((a) * 20091) >> 16) + (a))
|
||||
#define MUL2(a) (((a) * 35468) >> 16)
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@@ -78,7 +81,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = MUL2(in[4]);
|
||||
const int d4 = MUL1(in[4]);
|
||||
@@ -93,19 +96,21 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
#undef MUL2
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@@ -114,8 +119,9 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@@ -127,7 +133,8 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Paragraph 14.3
|
||||
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
out += 64;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
@@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t* const clip0 = VP8kclip1 - top[-1];
|
||||
@@ -174,21 +183,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// 16x16
|
||||
|
||||
static void VE16(uint8_t* dst) { // vertical
|
||||
static void VE16_C(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 16);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_C(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
memset(dst, dst[-1], 16);
|
||||
@@ -203,7 +212,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
static void DC16_C(uint8_t* dst) { // DC
|
||||
int DC = 16;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@@ -212,7 +221,7 @@ static void DC16(uint8_t* dst) { // DC
|
||||
Put16(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@@ -221,7 +230,7 @@ static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
|
||||
int DC = 8;
|
||||
int i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
@@ -230,9 +239,10 @@ static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
|
||||
@@ -242,7 +252,8 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE4_C(uint8_t* dst) { // vertical
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
@@ -255,8 +266,9 @@ static void VE4(uint8_t* dst) { // vertical
|
||||
memcpy(dst + i * BPS, vals, sizeof(vals));
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HE4(uint8_t* dst) { // horizontal
|
||||
static void HE4_C(uint8_t* dst) { // horizontal
|
||||
const int A = dst[-1 - BPS];
|
||||
const int B = dst[-1];
|
||||
const int C = dst[-1 + BPS];
|
||||
@@ -268,7 +280,8 @@ static void HE4(uint8_t* dst) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst) { // DC
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void DC4_C(uint8_t* dst) { // DC
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
|
||||
@@ -276,7 +289,7 @@ static void DC4(uint8_t* dst) { // DC
|
||||
for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
static void RD4_C(uint8_t* dst) { // Down-right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@@ -295,7 +308,7 @@ static void RD4(uint8_t* dst) { // Down-right
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
static void LD4_C(uint8_t* dst) { // Down-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@@ -312,8 +325,9 @@ static void LD4(uint8_t* dst) { // Down-Left
|
||||
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4_C(uint8_t* dst) { // Vertical-Right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@@ -335,7 +349,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4_C(uint8_t* dst) { // Vertical-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@@ -357,7 +371,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst) { // Horizontal-Up
|
||||
static void HU4_C(uint8_t* dst) { // Horizontal-Up
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@@ -372,7 +386,7 @@ static void HU4(uint8_t* dst) { // Horizontal-Up
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst) { // Horizontal-Down
|
||||
static void HD4_C(uint8_t* dst) { // Horizontal-Down
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@@ -404,14 +418,15 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE8uv_C(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
static void HE8uv_C(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memset(dst, dst[-1], 8);
|
||||
@@ -427,7 +442,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
static void DC8uv_C(uint8_t* dst) { // DC
|
||||
int dc0 = 8;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@@ -436,7 +451,7 @@ static void DC8uv(uint8_t* dst) { // DC
|
||||
Put8x8uv(dc0 >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@@ -445,7 +460,7 @@ static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@@ -454,17 +469,19 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Edge filtering functions
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
// 4 pixels in, 2 pixels out
|
||||
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
|
||||
@@ -474,7 +491,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 4 pixels in, 4 pixels out
|
||||
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0);
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3];
|
||||
@@ -487,7 +504,7 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 6 pixels in, 6 pixels out
|
||||
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
|
||||
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
|
||||
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
|
||||
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
|
||||
@@ -503,18 +520,22 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
|
||||
p[ 2*step] = VP8kclip1[q2 - a3];
|
||||
}
|
||||
|
||||
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
|
||||
static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
|
||||
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static WEBP_INLINE int needs_filter2(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
|
||||
const int p0 = p[-step], q0 = p[0];
|
||||
const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
|
||||
@@ -523,140 +544,159 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
|
||||
VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
|
||||
VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (needs_filter(p + i, stride, thresh2)) {
|
||||
do_filter2(p + i, stride);
|
||||
if (NeedsFilter_C(p + i, stride, thresh2)) {
|
||||
DoFilter2_C(p + i, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (needs_filter(p + i * stride, 1, thresh2)) {
|
||||
do_filter2(p + i * stride, 1);
|
||||
if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
|
||||
DoFilter2_C(p + i * stride, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
SimpleVFilter16_C(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
SimpleHFilter16_C(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
static WEBP_INLINE void FilterLoop26(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
} else {
|
||||
do_filter6(p, hstride);
|
||||
DoFilter6_C(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void FilterLoop24(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
} else {
|
||||
do_filter4(p, hstride);
|
||||
DoFilter4_C(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// on macroblock edges
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
static void VFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
static void HFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@@ -701,62 +741,69 @@ extern void VP8DspInitMIPS32(void);
|
||||
extern void VP8DspInitMIPSdspR2(void);
|
||||
extern void VP8DspInitMSA(void);
|
||||
|
||||
static volatile VP8CPUInfo dec_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&dec_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8DspInit) {
|
||||
VP8InitClipTables();
|
||||
|
||||
VP8TransformWHT = TransformWHT;
|
||||
VP8Transform = TransformTwo;
|
||||
VP8TransformUV = TransformUV;
|
||||
VP8TransformDC = TransformDC;
|
||||
VP8TransformDCUV = TransformDCUV;
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8TransformWHT = TransformWHT_C;
|
||||
VP8Transform = TransformTwo_C;
|
||||
VP8TransformDC = TransformDC_C;
|
||||
VP8TransformAC3 = TransformAC3_C;
|
||||
#endif
|
||||
VP8TransformUV = TransformUV_C;
|
||||
VP8TransformDCUV = TransformDCUV_C;
|
||||
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8VFilter16 = VFilter16_C;
|
||||
VP8VFilter16i = VFilter16i_C;
|
||||
VP8HFilter16 = HFilter16_C;
|
||||
VP8VFilter8 = VFilter8_C;
|
||||
VP8VFilter8i = VFilter8i_C;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_C;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_C;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_C;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_C;
|
||||
#endif
|
||||
|
||||
VP8PredLuma4[0] = DC4;
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[3] = HE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
VP8PredLuma4[8] = HD4;
|
||||
VP8PredLuma4[9] = HU4;
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8HFilter16i = HFilter16i_C;
|
||||
VP8HFilter8 = HFilter8_C;
|
||||
VP8HFilter8i = HFilter8i_C;
|
||||
#endif
|
||||
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma4[0] = DC4_C;
|
||||
VP8PredLuma4[1] = TM4_C;
|
||||
VP8PredLuma4[2] = VE4_C;
|
||||
VP8PredLuma4[4] = RD4_C;
|
||||
VP8PredLuma4[6] = LD4_C;
|
||||
#endif
|
||||
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
VP8PredLuma4[3] = HE4_C;
|
||||
VP8PredLuma4[5] = VR4_C;
|
||||
VP8PredLuma4[7] = VL4_C;
|
||||
VP8PredLuma4[8] = HD4_C;
|
||||
VP8PredLuma4[9] = HU4_C;
|
||||
|
||||
VP8DitherCombine8x8 = DitherCombine8x8;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma16[0] = DC16_C;
|
||||
VP8PredLuma16[1] = TM16_C;
|
||||
VP8PredLuma16[2] = VE16_C;
|
||||
VP8PredLuma16[3] = HE16_C;
|
||||
VP8PredLuma16[4] = DC16NoTop_C;
|
||||
VP8PredLuma16[5] = DC16NoLeft_C;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_C;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv_C;
|
||||
VP8PredChroma8[1] = TM8uv_C;
|
||||
VP8PredChroma8[2] = VE8uv_C;
|
||||
VP8PredChroma8[3] = HE8uv_C;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_C;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_C;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_C;
|
||||
#endif
|
||||
|
||||
VP8DitherCombine8x8 = DitherCombine8x8_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@@ -770,11 +817,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
VP8DspInitMIPS32();
|
||||
@@ -791,5 +833,55 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
dec_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8TransformWHT != NULL);
|
||||
assert(VP8Transform != NULL);
|
||||
assert(VP8TransformDC != NULL);
|
||||
assert(VP8TransformAC3 != NULL);
|
||||
assert(VP8TransformUV != NULL);
|
||||
assert(VP8TransformDCUV != NULL);
|
||||
assert(VP8VFilter16 != NULL);
|
||||
assert(VP8HFilter16 != NULL);
|
||||
assert(VP8VFilter8 != NULL);
|
||||
assert(VP8HFilter8 != NULL);
|
||||
assert(VP8VFilter16i != NULL);
|
||||
assert(VP8HFilter16i != NULL);
|
||||
assert(VP8VFilter8i != NULL);
|
||||
assert(VP8HFilter8i != NULL);
|
||||
assert(VP8SimpleVFilter16 != NULL);
|
||||
assert(VP8SimpleHFilter16 != NULL);
|
||||
assert(VP8SimpleVFilter16i != NULL);
|
||||
assert(VP8SimpleHFilter16i != NULL);
|
||||
assert(VP8PredLuma4[0] != NULL);
|
||||
assert(VP8PredLuma4[1] != NULL);
|
||||
assert(VP8PredLuma4[2] != NULL);
|
||||
assert(VP8PredLuma4[3] != NULL);
|
||||
assert(VP8PredLuma4[4] != NULL);
|
||||
assert(VP8PredLuma4[5] != NULL);
|
||||
assert(VP8PredLuma4[6] != NULL);
|
||||
assert(VP8PredLuma4[7] != NULL);
|
||||
assert(VP8PredLuma4[8] != NULL);
|
||||
assert(VP8PredLuma4[9] != NULL);
|
||||
assert(VP8PredLuma16[0] != NULL);
|
||||
assert(VP8PredLuma16[1] != NULL);
|
||||
assert(VP8PredLuma16[2] != NULL);
|
||||
assert(VP8PredLuma16[3] != NULL);
|
||||
assert(VP8PredLuma16[4] != NULL);
|
||||
assert(VP8PredLuma16[5] != NULL);
|
||||
assert(VP8PredLuma16[6] != NULL);
|
||||
assert(VP8PredChroma8[0] != NULL);
|
||||
assert(VP8PredChroma8[1] != NULL);
|
||||
assert(VP8PredChroma8[2] != NULL);
|
||||
assert(VP8PredChroma8[3] != NULL);
|
||||
assert(VP8PredChroma8[4] != NULL);
|
||||
assert(VP8PredChroma8[5] != NULL);
|
||||
assert(VP8PredChroma8[6] != NULL);
|
||||
assert(VP8DitherCombine8x8 != NULL);
|
||||
}
|
||||
|
||||
@@ -11,11 +11,14 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#define USE_STATIC_TABLES // undefine to have run-time table initialization
|
||||
// define to 0 to have run-time table initialization
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_TABLES
|
||||
#if (USE_STATIC_TABLES == 1)
|
||||
|
||||
static const uint8_t abs0[255 + 255 + 1] = {
|
||||
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
@@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
|
||||
// and make sure it's set to true _last_ (so as to be thread-safe)
|
||||
static volatile int tables_ok = 0;
|
||||
|
||||
#endif
|
||||
#endif // USE_STATIC_TABLES
|
||||
|
||||
const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
|
||||
const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
|
||||
@@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
|
||||
const uint8_t* const VP8kabs0 = &abs0[255];
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
#if (USE_STATIC_TABLES == 0)
|
||||
int i;
|
||||
if (!tables_ok) {
|
||||
for (i = -255; i <= 255; ++i) {
|
||||
|
||||
+363
-350
File diff suppressed because it is too large
Load Diff
+224
-228
@@ -12,23 +12,25 @@
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
// cduvivier@google.com (Christian Duvivier)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
|
||||
// one it seems => disable it by default. Uncomment the following to enable:
|
||||
// #define USE_TRANSFORM_AC3
|
||||
#if !defined(USE_TRANSFORM_AC3)
|
||||
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "./common_sse2.h"
|
||||
#include "../dsp/common_sse2.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
#define MUL(a, b) (((a) * (b)) >> 16)
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
@@ -248,7 +250,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
_mm_subs_epu8((p), (q)))
|
||||
|
||||
// Shift each byte of "x" by 3 bits while preserving by the sign bit.
|
||||
static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
|
||||
const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
|
||||
@@ -258,8 +260,8 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT2(a, b) { \
|
||||
a = _mm_xor_si128(a, sign_bit); \
|
||||
b = _mm_xor_si128(b, sign_bit); \
|
||||
(a) = _mm_xor_si128(a, sign_bit); \
|
||||
(b) = _mm_xor_si128(b, sign_bit); \
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT4(a, b, c, d) { \
|
||||
@@ -268,11 +270,11 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
}
|
||||
|
||||
// input/output is uint8_t
|
||||
static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i t_1 = MM_ABS(*p1, *p0);
|
||||
const __m128i t_2 = MM_ABS(*q1, *q0);
|
||||
@@ -285,11 +287,11 @@ static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input pixels are int8_t
|
||||
static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
// beware of addition order, for saturation!
|
||||
const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1); // p1 - q1
|
||||
const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0); // q0 - p0
|
||||
@@ -300,15 +302,16 @@ static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input and output are int8_t
|
||||
static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
|
||||
__m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
const __m128i k3 = _mm_set1_epi8(3);
|
||||
const __m128i k4 = _mm_set1_epi8(4);
|
||||
__m128i v3 = _mm_adds_epi8(*fl, k3);
|
||||
__m128i v4 = _mm_adds_epi8(*fl, k4);
|
||||
|
||||
SignedShift8b(&v4); // v4 >> 3
|
||||
SignedShift8b(&v3); // v3 >> 3
|
||||
SignedShift8b_SSE2(&v4); // v4 >> 3
|
||||
SignedShift8b_SSE2(&v3); // v3 >> 3
|
||||
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
|
||||
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
|
||||
}
|
||||
@@ -317,9 +320,9 @@ static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
|
||||
// Update operations:
|
||||
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
|
||||
// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
|
||||
static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
|
||||
const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
|
||||
const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
|
||||
@@ -330,11 +333,11 @@ static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
|
||||
}
|
||||
|
||||
// input pixels are uint8_t
|
||||
static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
const __m128i m_thresh = _mm_set1_epi8(thresh);
|
||||
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
|
||||
const __m128i kFE = _mm_set1_epi8(0xFE);
|
||||
@@ -353,28 +356,29 @@ static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
|
||||
// Edge filtering functions
|
||||
|
||||
// Applies filter on 2 pixels (p0 and q0)
|
||||
static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
__m128i a, mask;
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta)
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
|
||||
const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
|
||||
const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
|
||||
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &mask);
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
|
||||
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
GetBaseDelta(&p1s, p0, q0, &q1s, &a);
|
||||
GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
|
||||
a = _mm_and_si128(a, mask); // mask filter values we don't care about
|
||||
DoSimpleFilter(p0, q0, &a);
|
||||
DoSimpleFilter_SSE2(p0, q0, &a);
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
}
|
||||
|
||||
// Applies filter on 4 pixels (p1, p0, q0 and q1)
|
||||
static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
const __m128i k64 = _mm_set1_epi8(64);
|
||||
@@ -384,7 +388,7 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
__m128i t1, t2, t3;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
// convert to signed values
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
@@ -399,8 +403,8 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
|
||||
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
|
||||
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
|
||||
SignedShift8b(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
|
||||
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
@@ -417,25 +421,26 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
}
|
||||
|
||||
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
|
||||
static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
__m128i a, not_hev;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
FLIP_SIGN_BIT2(*p2, *q2);
|
||||
GetBaseDelta(p1, p0, q0, q1, &a);
|
||||
GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
|
||||
|
||||
{ // do simple filter on pixels with hev
|
||||
const __m128i m = _mm_andnot_si128(not_hev, *mask);
|
||||
const __m128i f = _mm_and_si128(a, m);
|
||||
DoSimpleFilter(p0, q0, &f);
|
||||
DoSimpleFilter_SSE2(p0, q0, &f);
|
||||
}
|
||||
|
||||
{ // do strong filter on pixels with not hev
|
||||
@@ -460,15 +465,15 @@ static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
|
||||
const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo); // Filter * 27 + 63
|
||||
const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi); // Filter * 27 + 63
|
||||
|
||||
Update2Pixels(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels(p0, q0, &a0_lo, &a0_hi);
|
||||
Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
|
||||
}
|
||||
}
|
||||
|
||||
// reads 8 rows across a vertical edge.
|
||||
static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
|
||||
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
|
||||
const __m128i A0 = _mm_set_epi32(
|
||||
@@ -494,11 +499,11 @@ static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
|
||||
*q = _mm_unpackhi_epi32(C0, C1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
// Assume the pixels around the edge (|) are numbered as follows
|
||||
// 00 01 | 02 03
|
||||
// 10 11 | 12 13
|
||||
@@ -514,8 +519,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
// q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
|
||||
// p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
|
||||
// q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
|
||||
Load8x4(r0, stride, p1, q0);
|
||||
Load8x4(r8, stride, p0, q1);
|
||||
Load8x4_SSE2(r0, stride, p1, q0);
|
||||
Load8x4_SSE2(r8, stride, p0, q1);
|
||||
|
||||
{
|
||||
// p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
|
||||
@@ -531,7 +536,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
|
||||
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
|
||||
uint8_t* dst, int stride) {
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i, dst += stride) {
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
|
||||
@@ -540,12 +546,12 @@ static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
|
||||
}
|
||||
|
||||
// Transpose back and store
|
||||
static WEBP_INLINE void Store16x4(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
__m128i t1, p1_s, p0_s, q0_s, q1_s;
|
||||
|
||||
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
|
||||
@@ -572,55 +578,55 @@ static WEBP_INLINE void Store16x4(const __m128i* const p1,
|
||||
p1_s = _mm_unpacklo_epi16(t1, q1_s);
|
||||
q1_s = _mm_unpackhi_epi16(t1, q1_s);
|
||||
|
||||
Store4x4(&p0_s, r0, stride);
|
||||
Store4x4_SSE2(&p0_s, r0, stride);
|
||||
r0 += 4 * stride;
|
||||
Store4x4(&q0_s, r0, stride);
|
||||
Store4x4_SSE2(&q0_s, r0, stride);
|
||||
|
||||
Store4x4(&p1_s, r8, stride);
|
||||
Store4x4_SSE2(&p1_s, r8, stride);
|
||||
r8 += 4 * stride;
|
||||
Store4x4(&q1_s, r8, stride);
|
||||
Store4x4_SSE2(&q1_s, r8, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
// Load
|
||||
__m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
|
||||
__m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
|
||||
__m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
|
||||
__m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
|
||||
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-stride], p0);
|
||||
_mm_storeu_si128((__m128i*)&p[0], q0);
|
||||
}
|
||||
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
__m128i p1, p0, q0, q1;
|
||||
|
||||
p -= 2; // beginning of p1
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
SimpleVFilter16_SSE2(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
SimpleHFilter16_SSE2(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,60 +634,60 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
#define MAX_DIFF1(p3, p2, p1, p0, m) do { \
|
||||
m = MM_ABS(p1, p0); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
(m) = MM_ABS(p1, p0); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define MAX_DIFF2(p3, p2, p1, p0, m) do { \
|
||||
m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) { \
|
||||
e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]); \
|
||||
e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]); \
|
||||
e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]); \
|
||||
e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]); \
|
||||
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
|
||||
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
|
||||
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
|
||||
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
|
||||
}
|
||||
|
||||
#define LOADUV_H_EDGE(p, u, v, stride) do { \
|
||||
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
|
||||
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
|
||||
p = _mm_unpacklo_epi64(U, V); \
|
||||
(p) = _mm_unpacklo_epi64(U, V); \
|
||||
} while (0)
|
||||
|
||||
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) { \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * stride); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * stride); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * stride); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * stride); \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
|
||||
}
|
||||
|
||||
#define STOREUV(p, u, v, stride) { \
|
||||
_mm_storel_epi64((__m128i*)&u[(stride)], p); \
|
||||
p = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&v[(stride)], p); \
|
||||
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
|
||||
(p) = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ComplexMask(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
const __m128i it = _mm_set1_epi8(ithresh);
|
||||
const __m128i diff = _mm_subs_epu8(*mask, it);
|
||||
const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
|
||||
__m128i filter_mask;
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
*mask = _mm_and_si128(thresh_mask, filter_mask);
|
||||
}
|
||||
|
||||
// on macroblock edges
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i t1;
|
||||
__m128i mask;
|
||||
__m128i p2, p1, p0, q0, q1, q2;
|
||||
@@ -694,8 +700,8 @@ static void VFilter16(uint8_t* p, int stride,
|
||||
LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
|
||||
@@ -706,28 +712,28 @@ static void VFilter16(uint8_t* p, int stride,
|
||||
_mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
|
||||
}
|
||||
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const b = p - 4;
|
||||
Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
@@ -744,8 +750,8 @@ static void VFilter16i(uint8_t* p, int stride,
|
||||
|
||||
// p3 and p2 are not just temporary variables here: they will be
|
||||
// re-used for next span. And q2/q3 will become p1/p0 accordingly.
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&b[0 * stride], p1);
|
||||
@@ -759,12 +765,12 @@ static void VFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
|
||||
for (k = 3; k > 0; --k) {
|
||||
__m128i mask, tmp1, tmp2;
|
||||
@@ -773,13 +779,13 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
p += 4; // beginning of q0 (and next span)
|
||||
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
|
||||
// rotate samples
|
||||
p1 = tmp1;
|
||||
@@ -788,8 +794,8 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@@ -801,8 +807,8 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p2, u, v, -3 * stride);
|
||||
@@ -813,28 +819,28 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const tu = u - 4;
|
||||
uint8_t* const tv = v - 4;
|
||||
Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
||||
@@ -849,8 +855,8 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p1, u, v, -2 * stride);
|
||||
@@ -859,24 +865,24 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
MAX_DIFF1(t2, t1, p1, p0, mask);
|
||||
|
||||
u += 4; // beginning of q0
|
||||
v += 4;
|
||||
Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
u -= 2; // beginning of p1
|
||||
v -= 2;
|
||||
Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -893,7 +899,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
static void VE4_SSE2(uint8_t* dst) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@@ -909,7 +915,7 @@ static void VE4(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
static void LD4_SSE2(uint8_t* dst) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@@ -925,7 +931,7 @@ static void LD4(uint8_t* dst) { // Down-Left
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
@@ -950,7 +956,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@@ -975,7 +981,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
|
||||
@@ -1004,7 +1010,7 @@ static void RD4(uint8_t* dst) { // Down-right
|
||||
//------------------------------------------------------------------------------
|
||||
// Luma 16x16
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
@@ -1041,11 +1047,11 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
|
||||
static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
|
||||
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
|
||||
|
||||
static void VE16(uint8_t* dst) {
|
||||
static void VE16_SSE2(uint8_t* dst) {
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@@ -1053,7 +1059,7 @@ static void VE16(uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
@@ -1062,7 +1068,7 @@ static void HE16(uint8_t* dst) { // horizontal
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@@ -1070,7 +1076,7 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
static void DC16_SSE2(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
@@ -1083,37 +1089,37 @@ static void DC16(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 16;
|
||||
Put16(DC >> 5, dst);
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
static void DC16NoTop_SSE2(uint8_t* dst) { // DC with top samples unavailable
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
DC += dst[-1 + j * BPS];
|
||||
}
|
||||
Put16(DC >> 4, dst);
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
static void DC16NoLeft_SSE2(uint8_t* dst) { // DC with left samples unavailable
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
|
||||
const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 8;
|
||||
Put16(DC >> 4, dst);
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
static void DC16NoTopLeft_SSE2(uint8_t* dst) { // DC with no top & left samples
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@@ -1121,17 +1127,8 @@ static void VE8uv(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
_mm_storel_epi64((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@@ -1139,7 +1136,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
static void DC8uv_SSE2(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
@@ -1150,29 +1147,29 @@ static void DC8uv(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 8;
|
||||
Put8x8uv(DC >> 4, dst);
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 4;
|
||||
Put8x8uv(DC >> 3, dst);
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop_SSE2(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[-1 + i * BPS];
|
||||
}
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
Put8x8uv_SSE2(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -1181,47 +1178,46 @@ static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
|
||||
VP8Transform = Transform;
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
VP8Transform = Transform_SSE2;
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
VP8TransformAC3 = TransformAC3_SSE2;
|
||||
#endif
|
||||
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
VP8VFilter16 = VFilter16_SSE2;
|
||||
VP8HFilter16 = HFilter16_SSE2;
|
||||
VP8VFilter8 = VFilter8_SSE2;
|
||||
VP8HFilter8 = HFilter8_SSE2;
|
||||
VP8VFilter16i = VFilter16i_SSE2;
|
||||
VP8HFilter16i = HFilter16i_SSE2;
|
||||
VP8VFilter8i = VFilter8i_SSE2;
|
||||
VP8HFilter8i = HFilter8i_SSE2;
|
||||
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
|
||||
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
VP8PredLuma4[1] = TM4_SSE2;
|
||||
VP8PredLuma4[2] = VE4_SSE2;
|
||||
VP8PredLuma4[4] = RD4_SSE2;
|
||||
VP8PredLuma4[5] = VR4_SSE2;
|
||||
VP8PredLuma4[6] = LD4_SSE2;
|
||||
VP8PredLuma4[7] = VL4_SSE2;
|
||||
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
VP8PredLuma16[0] = DC16_SSE2;
|
||||
VP8PredLuma16[1] = TM16_SSE2;
|
||||
VP8PredLuma16[2] = VE16_SSE2;
|
||||
VP8PredLuma16[3] = HE16_SSE2;
|
||||
VP8PredLuma16[4] = DC16NoTop_SSE2;
|
||||
VP8PredLuma16[5] = DC16NoLeft_SSE2;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
VP8PredChroma8[0] = DC8uv_SSE2;
|
||||
VP8PredChroma8[1] = TM8uv_SSE2;
|
||||
VP8PredChroma8[2] = VE8uv_SSE2;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_SSE2;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_SSE41(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
const __m128i kShuffle3 = _mm_set1_epi8(3);
|
||||
for (j = 16; j > 0; --j) {
|
||||
@@ -36,7 +36,7 @@ static void HE16(uint8_t* dst) { // horizontal
|
||||
extern void VP8DspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[3] = HE16_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
||||
+118
-34
@@ -38,10 +38,22 @@ extern "C" {
|
||||
# define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_CLANG_VERSION 0
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
// for now, none of the optimizations below are available in emscripten
|
||||
#if !defined(EMSCRIPTEN)
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
@@ -64,22 +76,20 @@ extern "C" {
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
|
||||
#define WEBP_USE_AVX2
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
|
||||
#define WEBP_ANDROID_NEON // Android targets that might support NEON
|
||||
#endif
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
|
||||
#if (defined(__ARM_NEON__) || \
|
||||
defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
@@ -90,7 +100,7 @@ extern "C" {
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
@@ -100,6 +110,24 @@ extern "C" {
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#endif /* EMSCRIPTEN */
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
@@ -109,6 +137,42 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func ## _lock)) break; \
|
||||
if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func ## _lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
|
||||
WEBP_DSP_INIT(name ## _body); \
|
||||
} \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
@@ -129,6 +193,18 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
@@ -143,7 +219,7 @@ typedef enum {
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
@@ -152,7 +228,7 @@ WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
|
||||
// avoiding a compiler warning.
|
||||
#define WEBP_DSP_INIT_STUB(func) \
|
||||
extern void func(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void func(void) {}
|
||||
void func(void) {}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Encoding
|
||||
@@ -271,6 +347,7 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
|
||||
int xo, int yo, // center position
|
||||
int W, int H); // plane dimension
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
// This version is called with the guarantee that you can load 8 bytes and
|
||||
// 8 rows at offset src1 and src2
|
||||
typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
@@ -278,10 +355,13 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
|
||||
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
|
||||
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_DISABLE_STATS)
|
||||
typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
|
||||
const uint8_t* src2, int len);
|
||||
extern VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||
#endif
|
||||
|
||||
// must be called before using any of the above directly
|
||||
void VP8SSIMDspInit(void);
|
||||
@@ -462,12 +542,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
@@ -533,25 +613,29 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
int width, int num_rows, int inverse);
|
||||
|
||||
// Plain-C versions, used as fallback by some implementations.
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
#endif
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// This function returns true if src[i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
// This function returns true if src[4*i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
|
||||
// To be called first before using the above.
|
||||
void WebPInitAlphaProcessing(void);
|
||||
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// To be called first before using the above.
|
||||
void VP8EncDspARGBInit(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Filter functions
|
||||
|
||||
@@ -591,4 +675,4 @@ void VP8FiltersInit(void);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DSP_DSP_H_ */
|
||||
#endif // WEBP_DSP_DSP_H_
|
||||
|
||||
+108
-94
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -20,16 +20,17 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
int i;
|
||||
if (inverse) {
|
||||
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
|
||||
@@ -41,7 +42,44 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
@@ -53,48 +91,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -105,26 +106,28 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLine(in, preds, out, width, inverse);
|
||||
PredictLine_C(in, preds, out, width, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@@ -136,7 +139,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@@ -147,11 +150,11 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
int w;
|
||||
// leftmost pixel: predict from above.
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
for (w = 1; w < width; ++w) {
|
||||
const int pred = GradientPredictor(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
const int pred = GradientPredictor_C(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
out[w] = in[w] + (inverse ? pred : -pred);
|
||||
}
|
||||
++row;
|
||||
@@ -160,32 +163,34 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef SANITY_CHECK
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t pred = (prev == NULL) ? 0 : prev[0];
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
@@ -194,26 +199,28 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
} else {
|
||||
uint8_t top = prev[0], top_left = top, left = top;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
top = prev[i]; // need to read this first, in case prev==out
|
||||
left = in[i] + GradientPredictor(left, top, top_left);
|
||||
left = in[i] + GradientPredictor_C(left, top, top_left);
|
||||
top_left = top;
|
||||
out[i] = left;
|
||||
}
|
||||
@@ -231,21 +238,20 @@ extern void VP8FiltersInitMSA(void);
|
||||
extern void VP8FiltersInitNEON(void);
|
||||
extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
static volatile VP8CPUInfo filters_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&filters_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
|
||||
WebPUnfilters[WEBP_FILTER_NONE] = NULL;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
|
||||
#endif
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
|
||||
|
||||
WebPFilters[WEBP_FILTER_NONE] = NULL;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@@ -253,11 +259,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
VP8FiltersInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8FiltersInitMIPSdspR2();
|
||||
@@ -269,5 +270,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
filters_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
@@ -24,16 +24,16 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@@ -51,7 +51,7 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@@ -71,10 +71,11 @@ static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@@ -84,7 +85,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -94,7 +95,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -104,9 +105,10 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@@ -117,7 +119,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -125,7 +127,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLineTop(in, in - stride, out, width);
|
||||
PredictLineTop_SSE2(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -135,14 +137,14 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
static void GradientPredictDirect(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@@ -161,14 +163,14 @@ static void GradientPredictDirect(const uint8_t* const row,
|
||||
_mm_storel_epi64((__m128i*)(out + i), H);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@@ -178,7 +180,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -187,7 +189,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
out[0] = in[0] - in[-stride];
|
||||
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@@ -198,26 +200,27 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Inverse transforms
|
||||
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
int i;
|
||||
__m128i last;
|
||||
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
|
||||
@@ -238,10 +241,10 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
|
||||
}
|
||||
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
const int max_pos = width & ~31;
|
||||
@@ -260,9 +263,9 @@ static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientPredictInverse(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
static void GradientPredictInverse_SSE2(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
if (length > 0) {
|
||||
int i;
|
||||
const int max_pos = length & ~7;
|
||||
@@ -293,18 +296,18 @@ static void GradientPredictInverse(const uint8_t* const in,
|
||||
_mm_storel_epi64((__m128i*)&row[i], out);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
} else {
|
||||
out[0] = in[0] + prev[0]; // predict from above
|
||||
GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
|
||||
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,13 +317,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
+99
-105
@@ -13,16 +13,15 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../utils/endian_inl_utils.h"
|
||||
#include "./lossless.h"
|
||||
#include "./lossless_common.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/lossless_common.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Image transforms.
|
||||
@@ -80,8 +79,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
||||
return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
|
||||
// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
|
||||
#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
|
||||
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
|
||||
// inlined.
|
||||
#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
|
||||
# define LOCAL_INLINE __attribute__ ((noinline))
|
||||
#else
|
||||
# define LOCAL_INLINE WEBP_INLINE
|
||||
@@ -107,69 +107,69 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)top;
|
||||
return left;
|
||||
}
|
||||
static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[0];
|
||||
}
|
||||
static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[1];
|
||||
}
|
||||
static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[-1];
|
||||
}
|
||||
static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
|
||||
static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
@@ -177,29 +177,29 @@ static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
|
||||
}
|
||||
(void)upper;
|
||||
}
|
||||
GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
|
||||
GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
|
||||
GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
|
||||
GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
|
||||
GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
|
||||
GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
|
||||
GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
|
||||
GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
|
||||
GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
|
||||
GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
|
||||
GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
|
||||
GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
|
||||
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Inverse prediction.
|
||||
static void PredictorInverseTransform(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* in, uint32_t* out) {
|
||||
static void PredictorInverseTransform_C(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* in, uint32_t* out) {
|
||||
const int width = transform->xsize_;
|
||||
if (y_start == 0) { // First Row follows the L (mode=1) mode.
|
||||
PredictorAdd0(in, NULL, 1, out);
|
||||
PredictorAdd1(in + 1, NULL, width - 1, out + 1);
|
||||
PredictorAdd0_C(in, NULL, 1, out);
|
||||
PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
|
||||
in += width;
|
||||
out += width;
|
||||
++y_start;
|
||||
@@ -217,7 +217,7 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
|
||||
const uint32_t* pred_mode_src = pred_mode_base;
|
||||
int x = 1;
|
||||
// First pixel follows the T (mode=2) mode.
|
||||
PredictorAdd2(in, out - width, 1, out);
|
||||
PredictorAdd2_C(in, out - width, 1, out);
|
||||
// .. the rest:
|
||||
while (x < width) {
|
||||
const VP8LPredictorAddSubFunc pred_func =
|
||||
@@ -272,8 +272,8 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t argb = src[i];
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
int new_red = red;
|
||||
int new_blue = argb;
|
||||
int new_red = red & 0xff;
|
||||
int new_blue = argb & 0xff;
|
||||
new_red += ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
@@ -284,9 +284,9 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
}
|
||||
|
||||
// Color space inverse transform.
|
||||
static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* src, uint32_t* dst) {
|
||||
static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* src, uint32_t* dst) {
|
||||
const int width = transform->xsize_;
|
||||
const int tile_width = 1 << transform->bits_;
|
||||
const int mask = tile_width - 1;
|
||||
@@ -362,10 +362,10 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \
|
||||
} \
|
||||
}
|
||||
|
||||
COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
|
||||
VP8GetARGBIndex, VP8GetARGBValue)
|
||||
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
|
||||
8b, VP8GetAlphaIndex, VP8GetAlphaValue)
|
||||
COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
|
||||
uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
|
||||
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
|
||||
uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
|
||||
|
||||
#undef COLOR_INDEX_INVERSE
|
||||
|
||||
@@ -380,7 +380,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
|
||||
break;
|
||||
case PREDICTOR_TRANSFORM:
|
||||
PredictorInverseTransform(transform, row_start, row_end, in, out);
|
||||
PredictorInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
if (row_end != transform->ysize_) {
|
||||
// The last predicted row in this iteration will be the top-pred row
|
||||
// for the first row in next iteration.
|
||||
@@ -389,7 +389,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
}
|
||||
break;
|
||||
case CROSS_COLOR_TRANSFORM:
|
||||
ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
|
||||
ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
break;
|
||||
case COLOR_INDEXING_TRANSFORM:
|
||||
if (in == out && transform->bits_ > 0) {
|
||||
@@ -403,9 +403,9 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
VP8LSubSampleSize(transform->xsize_, transform->bits_);
|
||||
uint32_t* const src = out + out_stride - in_stride;
|
||||
memmove(src, out, in_stride * sizeof(*src));
|
||||
ColorIndexInverseTransform(transform, row_start, row_end, src, out);
|
||||
ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
|
||||
} else {
|
||||
ColorIndexInverseTransform(transform, row_start, row_end, in, out);
|
||||
ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -452,7 +452,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
const uint32_t argb = *src++;
|
||||
const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
|
||||
const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
*dst++ = ba;
|
||||
*dst++ = rg;
|
||||
#else
|
||||
@@ -469,7 +469,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
const uint32_t argb = *src++;
|
||||
const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
|
||||
const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
*dst++ = gb;
|
||||
*dst++ = rg;
|
||||
#else
|
||||
@@ -496,22 +496,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
|
||||
#if !defined(WORDS_BIGENDIAN)
|
||||
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
|
||||
WebPUint32ToMem(dst, BSwap32(argb));
|
||||
#else // WEBP_REFERENCE_IMPLEMENTATION
|
||||
dst[0] = (argb >> 24) & 0xff;
|
||||
dst[1] = (argb >> 16) & 0xff;
|
||||
dst[2] = (argb >> 8) & 0xff;
|
||||
dst[3] = (argb >> 0) & 0xff;
|
||||
#endif
|
||||
#else // WORDS_BIGENDIAN
|
||||
dst[0] = (argb >> 0) & 0xff;
|
||||
dst[1] = (argb >> 8) & 0xff;
|
||||
dst[2] = (argb >> 16) & 0xff;
|
||||
dst[3] = (argb >> 24) & 0xff;
|
||||
#endif
|
||||
dst += sizeof(argb);
|
||||
}
|
||||
} else {
|
||||
@@ -590,48 +575,46 @@ extern void VP8LDspInitNEON(void);
|
||||
extern void VP8LDspInitMIPSdspR2(void);
|
||||
extern void VP8LDspInitMSA(void);
|
||||
|
||||
static volatile VP8CPUInfo lossless_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&lossless_last_cpuinfo_used;
|
||||
|
||||
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
|
||||
(OUT)[0] = IN##0; \
|
||||
(OUT)[1] = IN##1; \
|
||||
(OUT)[2] = IN##2; \
|
||||
(OUT)[3] = IN##3; \
|
||||
(OUT)[4] = IN##4; \
|
||||
(OUT)[5] = IN##5; \
|
||||
(OUT)[6] = IN##6; \
|
||||
(OUT)[7] = IN##7; \
|
||||
(OUT)[8] = IN##8; \
|
||||
(OUT)[9] = IN##9; \
|
||||
(OUT)[10] = IN##10; \
|
||||
(OUT)[11] = IN##11; \
|
||||
(OUT)[12] = IN##12; \
|
||||
(OUT)[13] = IN##13; \
|
||||
(OUT)[14] = IN##0; /* <- padding security sentinels*/ \
|
||||
(OUT)[15] = IN##0; \
|
||||
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
|
||||
(OUT)[0] = IN##0_C; \
|
||||
(OUT)[1] = IN##1_C; \
|
||||
(OUT)[2] = IN##2_C; \
|
||||
(OUT)[3] = IN##3_C; \
|
||||
(OUT)[4] = IN##4_C; \
|
||||
(OUT)[5] = IN##5_C; \
|
||||
(OUT)[6] = IN##6_C; \
|
||||
(OUT)[7] = IN##7_C; \
|
||||
(OUT)[8] = IN##8_C; \
|
||||
(OUT)[9] = IN##9_C; \
|
||||
(OUT)[10] = IN##10_C; \
|
||||
(OUT)[11] = IN##11_C; \
|
||||
(OUT)[12] = IN##12_C; \
|
||||
(OUT)[13] = IN##13_C; \
|
||||
(OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
|
||||
(OUT)[15] = IN##0_C; \
|
||||
} while (0);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8LDspInit) {
|
||||
COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
|
||||
COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
|
||||
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
|
||||
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
|
||||
|
||||
VP8LTransformColorInverse = VP8LTransformColorInverse_C;
|
||||
|
||||
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
||||
VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
|
||||
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||
#endif
|
||||
|
||||
VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
|
||||
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
|
||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||
|
||||
VP8LMapColor32b = MapARGB;
|
||||
VP8LMapColor8b = MapAlpha;
|
||||
VP8LMapColor32b = MapARGB_C;
|
||||
VP8LMapColor8b = MapAlpha_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@@ -640,11 +623,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
VP8LDspInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8LDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8LDspInitMIPSdspR2();
|
||||
@@ -656,7 +634,23 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
lossless_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8LDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8LAddGreenToBlueAndRed != NULL);
|
||||
assert(VP8LTransformColorInverse != NULL);
|
||||
assert(VP8LConvertBGRAToRGBA != NULL);
|
||||
assert(VP8LConvertBGRAToRGB != NULL);
|
||||
assert(VP8LConvertBGRAToBGR != NULL);
|
||||
assert(VP8LConvertBGRAToRGBA4444 != NULL);
|
||||
assert(VP8LConvertBGRAToRGB565 != NULL);
|
||||
assert(VP8LMapColor32b != NULL);
|
||||
assert(VP8LMapColor8b != NULL);
|
||||
}
|
||||
#undef COPY_PREDICTOR_ARRAY
|
||||
|
||||
|
||||
@@ -25,10 +25,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef WEBP_EXPERIMENTAL_FEATURES
|
||||
#include "../enc/delta_palettization_enc.h"
|
||||
#endif // WEBP_EXPERIMENTAL_FEATURES
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
@@ -124,7 +120,7 @@ void VP8LDspInit(void);
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* const dst, int num_pixels);
|
||||
uint32_t* dst, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
@@ -167,7 +163,7 @@ extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
|
||||
typedef struct { // small struct to hold counters
|
||||
int counts[2]; // index: 0=zero steak, 1=non-zero streak
|
||||
int counts[2]; // index: 0=zero streak, 1=non-zero streak
|
||||
int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
|
||||
} VP8LStreaks;
|
||||
|
||||
@@ -198,10 +194,14 @@ extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy);
|
||||
|
||||
typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
extern VP8LHistogramAddFunc VP8LHistogramAdd;
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
|
||||
uint32_t* out, int size);
|
||||
extern VP8LAddVectorFunc VP8LAddVector;
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
|
||||
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
@@ -93,14 +93,6 @@ static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
|
||||
const int log_floor = BitsLog2Floor(n);
|
||||
if (n == (n & ~(n - 1))) { // zero or a power of two.
|
||||
return log_floor;
|
||||
}
|
||||
return log_floor + 1;
|
||||
}
|
||||
|
||||
// Splitting of distance and length codes into prefixes and
|
||||
// extra bits. The prefixes are encoded with an entropy code
|
||||
// while the extra bits are stored just as normal bits.
|
||||
|
||||
@@ -11,14 +11,14 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./lossless.h"
|
||||
#include "./neon.h"
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/neon.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Colorspace conversion functions
|
||||
@@ -26,8 +26,8 @@
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
|
||||
// gcc-4.8.x at least.
|
||||
static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@@ -53,8 +53,8 @@ static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@@ -71,8 +71,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
|
||||
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
|
||||
|
||||
static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~1);
|
||||
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
|
||||
for (; src < end; src += 2) {
|
||||
@@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
|
||||
{ 21, 22, 24, 25, 26, 28, 29, 30 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
|
||||
@@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
|
||||
{ 21, 20, 26, 25, 24, 30, 29, 28 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
|
||||
@@ -139,7 +139,6 @@ static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictor Transform
|
||||
|
||||
@@ -506,8 +505,8 @@ static const uint8_t kGreenShuffle[16] = {
|
||||
1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
|
||||
};
|
||||
|
||||
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||
const uint8x16_t shuffle) {
|
||||
static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
|
||||
const uint8x16_t shuffle) {
|
||||
return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
|
||||
vtbl1q_u8(argb, vget_high_u8(shuffle)));
|
||||
}
|
||||
@@ -515,15 +514,15 @@ static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||
// 255 = byte will be zeroed
|
||||
static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
|
||||
|
||||
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||
const uint8x8_t shuffle) {
|
||||
static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
|
||||
const uint8x8_t shuffle) {
|
||||
return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
|
||||
vtbl1_u8(vget_high_u8(argb), shuffle));
|
||||
}
|
||||
#endif // USE_VTBLQ
|
||||
|
||||
static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
static void AddGreenToBlueAndRed_NEON(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~3);
|
||||
#ifdef USE_VTBLQ
|
||||
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
|
||||
@@ -532,7 +531,7 @@ static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
|
||||
#endif
|
||||
for (; src < end; src += 4, dst += 4) {
|
||||
const uint8x16_t argb = vld1q_u8((const uint8_t*)src);
|
||||
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
|
||||
const uint8x16_t greens = DoGreenShuffle_NEON(argb, shuffle);
|
||||
vst1q_u8((uint8_t*)dst, vaddq_u8(argb, greens));
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
@@ -542,9 +541,9 @@ static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
static void TransformColorInverse_NEON(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src,
|
||||
int num_pixels, uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
@@ -575,7 +574,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const uint8x16_t in = vld1q_u8((const uint8_t*)(src + i));
|
||||
const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
|
||||
// 0 g 0 g
|
||||
const uint8x16_t greens = DoGreenShuffle(in, shuffle);
|
||||
const uint8x16_t greens = DoGreenShuffle_NEON(in, shuffle);
|
||||
// x dr x db1
|
||||
const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
|
||||
// x r' x b'
|
||||
@@ -627,12 +626,12 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
|
||||
VP8LPredictorsAdd[12] = PredictorAdd12_NEON;
|
||||
VP8LPredictorsAdd[13] = PredictorAdd13_NEON;
|
||||
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_NEON;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_NEON;
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_NEON;
|
||||
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
|
||||
VP8LTransformColorInverse = TransformColorInverse;
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_NEON;
|
||||
VP8LTransformColorInverse = TransformColorInverse_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
+160
-130
@@ -11,21 +11,22 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include "./common_sse2.h"
|
||||
#include "./lossless.h"
|
||||
#include "./lossless_common.h"
|
||||
#include "../dsp/common_sse2.h"
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/lossless_common.h"
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictor Transform
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
||||
uint32_t c2) {
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
|
||||
uint32_t c1,
|
||||
uint32_t c2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
|
||||
@@ -37,8 +38,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
||||
return output;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
||||
uint32_t c2) {
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
|
||||
uint32_t c1,
|
||||
uint32_t c2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
|
||||
@@ -55,7 +57,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
||||
return output;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
|
||||
int pa_minus_pb;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A0 = _mm_cvtsi32_si128(a);
|
||||
@@ -88,8 +90,9 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
*avg = _mm_sub_epi8(avg1, one);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Average2_uint32(const uint32_t a0, const uint32_t a1,
|
||||
__m128i* const avg) {
|
||||
static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
|
||||
const uint32_t a1,
|
||||
__m128i* const avg) {
|
||||
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
|
||||
const __m128i ones = _mm_set1_epi8(1);
|
||||
const __m128i A0 = _mm_cvtsi32_si128(a0);
|
||||
@@ -99,7 +102,7 @@ static WEBP_INLINE void Average2_uint32(const uint32_t a0, const uint32_t a1,
|
||||
*avg = _mm_sub_epi8(avg1, one);
|
||||
}
|
||||
|
||||
static WEBP_INLINE __m128i Average2_uint32_16(uint32_t a0, uint32_t a1) {
|
||||
static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
|
||||
@@ -107,15 +110,16 @@ static WEBP_INLINE __m128i Average2_uint32_16(uint32_t a0, uint32_t a1) {
|
||||
return _mm_srli_epi16(sum, 1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
|
||||
static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
|
||||
__m128i output;
|
||||
Average2_uint32(a0, a1, &output);
|
||||
Average2_uint32_SSE2(a0, a1, &output);
|
||||
return _mm_cvtsi128_si32(output);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
|
||||
static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
|
||||
uint32_t a2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i avg1 = Average2_uint32_16(a0, a2);
|
||||
const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
|
||||
const __m128i sum = _mm_add_epi16(avg1, A1);
|
||||
const __m128i avg2 = _mm_srli_epi16(sum, 1);
|
||||
@@ -124,10 +128,10 @@ static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
|
||||
return output;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
|
||||
uint32_t a2, uint32_t a3) {
|
||||
const __m128i avg1 = Average2_uint32_16(a0, a1);
|
||||
const __m128i avg2 = Average2_uint32_16(a2, a3);
|
||||
static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
|
||||
uint32_t a2, uint32_t a3) {
|
||||
const __m128i avg1 = Average2_uint32_16_SSE2(a0, a1);
|
||||
const __m128i avg2 = Average2_uint32_16_SSE2(a2, a3);
|
||||
const __m128i sum = _mm_add_epi16(avg2, avg1);
|
||||
const __m128i avg3 = _mm_srli_epi16(sum, 1);
|
||||
const __m128i A0 = _mm_packus_epi16(avg3, avg3);
|
||||
@@ -136,41 +140,41 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(left, top[0], top[1]);
|
||||
const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[-1]);
|
||||
const uint32_t pred = Average2_SSE2(left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[0]);
|
||||
const uint32_t pred = Average2_SSE2(left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[-1], top[0]);
|
||||
const uint32_t pred = Average2_SSE2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[0], top[1]);
|
||||
const uint32_t pred = Average2_SSE2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
||||
const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], left, top[-1]);
|
||||
const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
@@ -272,9 +276,24 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
#undef GENERATE_PREDICTOR_2
|
||||
|
||||
// Predictor10: average of (average of (L,TL), average of (T, TR)).
|
||||
#define DO_PRED10(OUT) do { \
|
||||
__m128i avgLTL, avg; \
|
||||
Average2_m128i(&L, &TL, &avgLTL); \
|
||||
Average2_m128i(&avgTTR, &avgLTL, &avg); \
|
||||
L = _mm_add_epi8(avg, src); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED10_SHIFT do { \
|
||||
/* Rotate the pre-computed values for the next iteration.*/ \
|
||||
avgTTR = _mm_srli_si128(avgTTR, 4); \
|
||||
TL = _mm_srli_si128(TL, 4); \
|
||||
src = _mm_srli_si128(src, 4); \
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i, j;
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
__m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
@@ -283,79 +302,90 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]);
|
||||
__m128i avgTTR;
|
||||
Average2_m128i(&T, &TR, &avgTTR);
|
||||
for (j = 0; j < 4; ++j) {
|
||||
__m128i avgLTL, avg;
|
||||
Average2_m128i(&L, &TL, &avgLTL);
|
||||
Average2_m128i(&avgTTR, &avgLTL, &avg);
|
||||
L = _mm_add_epi8(avg, src);
|
||||
out[i + j] = _mm_cvtsi128_si32(L);
|
||||
// Rotate the pre-computed values for the next iteration.
|
||||
avgTTR = _mm_srli_si128(avgTTR, 4);
|
||||
TL = _mm_srli_si128(TL, 4);
|
||||
src = _mm_srli_si128(src, 4);
|
||||
}
|
||||
DO_PRED10(0);
|
||||
DO_PRED10_SHIFT;
|
||||
DO_PRED10(1);
|
||||
DO_PRED10_SHIFT;
|
||||
DO_PRED10(2);
|
||||
DO_PRED10_SHIFT;
|
||||
DO_PRED10(3);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED10
|
||||
#undef DO_PRED10_SHIFT
|
||||
|
||||
// Predictor11: select.
|
||||
static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B,
|
||||
__m128i* const out) {
|
||||
// We can unpack with any value on the upper 32 bits, provided it's the same
|
||||
// on both operands (to that their sum of abs diff is zero). Here we use *A.
|
||||
const __m128i A_lo = _mm_unpacklo_epi32(*A, *A);
|
||||
const __m128i B_lo = _mm_unpacklo_epi32(*B, *A);
|
||||
const __m128i A_hi = _mm_unpackhi_epi32(*A, *A);
|
||||
const __m128i B_hi = _mm_unpackhi_epi32(*B, *A);
|
||||
const __m128i s_lo = _mm_sad_epu8(A_lo, B_lo);
|
||||
const __m128i s_hi = _mm_sad_epu8(A_hi, B_hi);
|
||||
*out = _mm_packs_epi32(s_lo, s_hi);
|
||||
}
|
||||
#define DO_PRED11(OUT) do { \
|
||||
const __m128i L_lo = _mm_unpacklo_epi32(L, T); \
|
||||
const __m128i TL_lo = _mm_unpacklo_epi32(TL, T); \
|
||||
const __m128i pb = _mm_sad_epu8(L_lo, TL_lo); /* pb = sum |L-TL|*/ \
|
||||
const __m128i mask = _mm_cmpgt_epi32(pb, pa); \
|
||||
const __m128i A = _mm_and_si128(mask, L); \
|
||||
const __m128i B = _mm_andnot_si128(mask, T); \
|
||||
const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
|
||||
L = _mm_add_epi8(src, pred); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED11_SHIFT do { \
|
||||
/* Shift the pre-computed value for the next iteration.*/ \
|
||||
T = _mm_srli_si128(T, 4); \
|
||||
TL = _mm_srli_si128(TL, 4); \
|
||||
src = _mm_srli_si128(src, 4); \
|
||||
pa = _mm_srli_si128(pa, 4); \
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i, j;
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
__m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
|
||||
__m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
|
||||
__m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
__m128i pa;
|
||||
GetSumAbsDiff32(&T, &TL, &pa); // pa = sum |T-TL|
|
||||
for (j = 0; j < 4; ++j) {
|
||||
const __m128i L_lo = _mm_unpacklo_epi32(L, L);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi32(TL, L);
|
||||
const __m128i pb = _mm_sad_epu8(L_lo, TL_lo); // pb = sum |L-TL|
|
||||
const __m128i mask = _mm_cmpgt_epi32(pb, pa);
|
||||
const __m128i A = _mm_and_si128(mask, L);
|
||||
const __m128i B = _mm_andnot_si128(mask, T);
|
||||
const __m128i pred = _mm_or_si128(A, B); // pred = (L > T)? L : T
|
||||
L = _mm_add_epi8(src, pred);
|
||||
out[i + j] = _mm_cvtsi128_si32(L);
|
||||
// Shift the pre-computed value for the next iteration.
|
||||
T = _mm_srli_si128(T, 4);
|
||||
TL = _mm_srli_si128(TL, 4);
|
||||
src = _mm_srli_si128(src, 4);
|
||||
pa = _mm_srli_si128(pa, 4);
|
||||
{
|
||||
// We can unpack with any value on the upper 32 bits, provided it's the
|
||||
// same on both operands (so that their sum of abs diff is zero). Here we
|
||||
// use T.
|
||||
const __m128i T_lo = _mm_unpacklo_epi32(T, T);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi32(TL, T);
|
||||
const __m128i T_hi = _mm_unpackhi_epi32(T, T);
|
||||
const __m128i TL_hi = _mm_unpackhi_epi32(TL, T);
|
||||
const __m128i s_lo = _mm_sad_epu8(T_lo, TL_lo);
|
||||
const __m128i s_hi = _mm_sad_epu8(T_hi, TL_hi);
|
||||
pa = _mm_packs_epi32(s_lo, s_hi); // pa = sum |T-TL|
|
||||
}
|
||||
DO_PRED11(0);
|
||||
DO_PRED11_SHIFT;
|
||||
DO_PRED11(1);
|
||||
DO_PRED11_SHIFT;
|
||||
DO_PRED11(2);
|
||||
DO_PRED11_SHIFT;
|
||||
DO_PRED11(3);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[11](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED11
|
||||
#undef DO_PRED11_SHIFT
|
||||
|
||||
// Predictor12: ClampedAddSubtractFull.
|
||||
#define DO_PRED12(DIFF, LANE, OUT) \
|
||||
do { \
|
||||
const __m128i all = _mm_add_epi16(L, (DIFF)); \
|
||||
const __m128i alls = _mm_packus_epi16(all, all); \
|
||||
const __m128i res = _mm_add_epi8(src, alls); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(res); \
|
||||
L = _mm_unpacklo_epi8(res, zero); \
|
||||
#define DO_PRED12(DIFF, LANE, OUT) do { \
|
||||
const __m128i all = _mm_add_epi16(L, (DIFF)); \
|
||||
const __m128i alls = _mm_packus_epi16(all, all); \
|
||||
const __m128i res = _mm_add_epi8(src, alls); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(res); \
|
||||
L = _mm_unpacklo_epi8(res, zero); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED12_SHIFT(DIFF, LANE) do { \
|
||||
/* Shift the pre-computed value for the next iteration.*/ \
|
||||
if (LANE == 0) (DIFF) = _mm_srli_si128((DIFF), 8); \
|
||||
if ((LANE) == 0) (DIFF) = _mm_srli_si128((DIFF), 8); \
|
||||
src = _mm_srli_si128(src, 4); \
|
||||
} while (0)
|
||||
|
||||
@@ -377,8 +407,11 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
__m128i diff_lo = _mm_sub_epi16(T_lo, TL_lo);
|
||||
__m128i diff_hi = _mm_sub_epi16(T_hi, TL_hi);
|
||||
DO_PRED12(diff_lo, 0, 0);
|
||||
DO_PRED12_SHIFT(diff_lo, 0);
|
||||
DO_PRED12(diff_lo, 1, 1);
|
||||
DO_PRED12_SHIFT(diff_lo, 1);
|
||||
DO_PRED12(diff_hi, 0, 2);
|
||||
DO_PRED12_SHIFT(diff_hi, 0);
|
||||
DO_PRED12(diff_hi, 1, 3);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
@@ -386,6 +419,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
}
|
||||
}
|
||||
#undef DO_PRED12
|
||||
#undef DO_PRED12_SHIFT
|
||||
|
||||
// Due to averages with integers, values cannot be accumulated in parallel for
|
||||
// predictors 13.
|
||||
@@ -394,8 +428,8 @@ GENERATE_PREDICTOR_ADD(Predictor13_SSE2, PredictorAdd13_SSE2)
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
|
||||
static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
static void AddGreenToBlueAndRed_SSE2(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
|
||||
@@ -414,19 +448,16 @@ static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src,
|
||||
int num_pixels, uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 5.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
||||
const __m128i mults_rb = _mm_set_epi16(
|
||||
CST(green_to_red_), CST(green_to_blue_),
|
||||
CST(green_to_red_), CST(green_to_blue_),
|
||||
CST(green_to_red_), CST(green_to_blue_),
|
||||
CST(green_to_red_), CST(green_to_blue_));
|
||||
const __m128i mults_b2 = _mm_set_epi16(
|
||||
CST(red_to_blue_), 0, CST(red_to_blue_), 0,
|
||||
CST(red_to_blue_), 0, CST(red_to_blue_), 0);
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
|
||||
#undef MK_CST_16
|
||||
#undef CST
|
||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||
int i;
|
||||
@@ -454,8 +485,8 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
||||
@@ -469,11 +500,11 @@ static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
|
||||
__m128i in5 = _mm_loadu_si128(in + 5);
|
||||
__m128i in6 = _mm_loadu_si128(in + 6);
|
||||
__m128i in7 = _mm_loadu_si128(in + 7);
|
||||
VP8L32bToPlanar(&in0, &in1, &in2, &in3);
|
||||
VP8L32bToPlanar(&in4, &in5, &in6, &in7);
|
||||
VP8L32bToPlanar_SSE2(&in0, &in1, &in2, &in3);
|
||||
VP8L32bToPlanar_SSE2(&in4, &in5, &in6, &in7);
|
||||
// At this points, in1/in5 contains red only, in2/in6 green only ...
|
||||
// Pack the colors in 24b RGB.
|
||||
VP8PlanarTo24b(&in1, &in5, &in2, &in6, &in3, &in7);
|
||||
VP8PlanarTo24b_SSE2(&in1, &in5, &in2, &in6, &in3, &in7);
|
||||
_mm_storeu_si128(out + 0, in1);
|
||||
_mm_storeu_si128(out + 1, in5);
|
||||
_mm_storeu_si128(out + 2, in2);
|
||||
@@ -490,27 +521,26 @@ static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
while (num_pixels >= 8) {
|
||||
const __m128i bgra0 = _mm_loadu_si128(in++); // bgra0|bgra1|bgra2|bgra3
|
||||
const __m128i bgra4 = _mm_loadu_si128(in++); // bgra4|bgra5|bgra6|bgra7
|
||||
const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4); // b0b4g0g4r0r4a0a4...
|
||||
const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4); // b2b6g2g6r2r6a2a6...
|
||||
const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h); // b0b2b4b6g0g2g4g6...
|
||||
const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h); // b1b3b5b7g1g3g5g7...
|
||||
const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h); // b0...b7 | g0...g7
|
||||
const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h); // r0...r7 | a0...a7
|
||||
const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h); // g0...g7 | a0...a7
|
||||
const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l); // r0...r7 | b0...b7
|
||||
const __m128i rg0 = _mm_unpacklo_epi8(rb0, ga0); // r0g0r1g1 ... r6g6r7g7
|
||||
const __m128i ba0 = _mm_unpackhi_epi8(rb0, ga0); // b0a0b1a1 ... b6a6b7a7
|
||||
const __m128i rgba0 = _mm_unpacklo_epi16(rg0, ba0); // rgba0|rgba1...
|
||||
const __m128i rgba4 = _mm_unpackhi_epi16(rg0, ba0); // rgba4|rgba5...
|
||||
_mm_storeu_si128(out++, rgba0);
|
||||
_mm_storeu_si128(out++, rgba4);
|
||||
const __m128i A1 = _mm_loadu_si128(in++);
|
||||
const __m128i A2 = _mm_loadu_si128(in++);
|
||||
const __m128i B1 = _mm_and_si128(A1, red_blue_mask); // R 0 B 0
|
||||
const __m128i B2 = _mm_and_si128(A2, red_blue_mask); // R 0 B 0
|
||||
const __m128i C1 = _mm_andnot_si128(red_blue_mask, A1); // 0 G 0 A
|
||||
const __m128i C2 = _mm_andnot_si128(red_blue_mask, A2); // 0 G 0 A
|
||||
const __m128i D1 = _mm_shufflelo_epi16(B1, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128i D2 = _mm_shufflelo_epi16(B2, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128i E1 = _mm_shufflehi_epi16(D1, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128i E2 = _mm_shufflehi_epi16(D2, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128i F1 = _mm_or_si128(E1, C1);
|
||||
const __m128i F2 = _mm_or_si128(E2, C2);
|
||||
_mm_storeu_si128(out++, F1);
|
||||
_mm_storeu_si128(out++, F2);
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
@@ -519,8 +549,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@@ -541,7 +571,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
|
||||
const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f); // g0-|g1-|...|a6-|a7-
|
||||
const __m128i rgba0 = _mm_or_si128(ga2, rb1); // rg0..rg7 | ba0..ba7
|
||||
const __m128i rgba1 = _mm_srli_si128(rgba0, 8); // ba0..ba7 | 0
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0); // barg0...barg7
|
||||
#else
|
||||
const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1); // rgba0...rgba7
|
||||
@@ -555,8 +585,8 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
@@ -582,7 +612,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
|
||||
const __m128i rg1 = _mm_or_si128(rb1, g_lo2); // gr0...gr7|xx
|
||||
const __m128i b1 = _mm_srli_epi16(b0, 3);
|
||||
const __m128i gb1 = _mm_or_si128(b1, g_hi2); // bg0...bg7|xx
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1); // rggb0...rggb7
|
||||
#else
|
||||
const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1); // bgrb0...bgrb7
|
||||
@@ -596,8 +626,8 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
|
||||
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@@ -660,14 +690,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
|
||||
VP8LPredictorsAdd[12] = PredictorAdd12_SSE2;
|
||||
VP8LPredictorsAdd[13] = PredictorAdd13_SSE2;
|
||||
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
|
||||
VP8LTransformColorInverse = TransformColorInverse;
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_SSE2;
|
||||
VP8LTransformColorInverse = TransformColorInverse_SSE2;
|
||||
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
|
||||
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
|
||||
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE2;
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_SSE2;
|
||||
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_SSE2;
|
||||
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_SSE2;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
@@ -27,8 +27,10 @@ SOURCES += [
|
||||
'upsampling.c',
|
||||
'upsampling_neon.c',
|
||||
'upsampling_sse2.c',
|
||||
'upsampling_sse41.c',
|
||||
'yuv.c',
|
||||
'yuv_sse2.c',
|
||||
'yuv_sse41.c',
|
||||
]
|
||||
|
||||
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
|
||||
@@ -45,7 +47,9 @@ elif CONFIG['INTEL_ARCHITECTURE']:
|
||||
SOURCES['lossless_sse2.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
SOURCES['rescaler_sse2.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
SOURCES['upsampling_sse2.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
SOURCES['upsampling_sse41.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
SOURCES['yuv_sse2.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
SOURCES['yuv_sse41.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
||||
FINAL_LIBRARY = 'gkmedias'
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef CLANG_BUILD
|
||||
#define ALPHAVAL (-1)
|
||||
#define ADDVI_H(a, b) __msa_addvi_h((v8i16)a, b)
|
||||
#define ADDVI_W(a, b) __msa_addvi_w((v4i32)a, b)
|
||||
#define SRAI_B(a, b) __msa_srai_b((v16i8)a, b)
|
||||
@@ -32,6 +33,7 @@
|
||||
#define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)
|
||||
#define ORI_B(a, b) __msa_ori_b((v16u8)a, b)
|
||||
#else
|
||||
#define ALPHAVAL (0xff)
|
||||
#define ADDVI_H(a, b) (a + b)
|
||||
#define ADDVI_W(a, b) (a + b)
|
||||
#define SRAI_B(a, b) (a >> b)
|
||||
@@ -1387,4 +1389,4 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
|
||||
} while (0)
|
||||
#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif /* WEBP_DSP_MSA_MACRO_H_ */
|
||||
#endif // WEBP_DSP_MSA_MACRO_H_
|
||||
|
||||
@@ -14,11 +14,12 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
// Right now, some intrinsics functions seem slower, so we disable them
|
||||
// everywhere except aarch64 where the inline assembly is incompatible.
|
||||
#if defined(__aarch64__)
|
||||
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
|
||||
// incompatible.
|
||||
#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
|
||||
#define WEBP_USE_INTRINSICS // use intrinsics when possible
|
||||
#endif
|
||||
|
||||
@@ -43,11 +44,11 @@
|
||||
// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
|
||||
// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
|
||||
// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
|
||||
#if !(LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WORK_AROUND_GCC
|
||||
#endif
|
||||
|
||||
static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) {
|
||||
static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
|
||||
uint64x2x2_t row01, row23;
|
||||
|
||||
row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
// Copyright 2018 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#ifndef WEBP_DSP_QUANT_H_
|
||||
#define WEBP_DSP_QUANT_H_
|
||||
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../webp/types.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) && \
|
||||
!defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define IsFlat IsFlat_NEON
|
||||
|
||||
static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
|
||||
const uint64x2_t b = vpaddlq_u32(a);
|
||||
return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
|
||||
vreinterpret_u32_u64(vget_high_u64(b)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
|
||||
int thresh) {
|
||||
const int16x8_t tst_ones = vdupq_n_s16(-1);
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
// Set DC to zero.
|
||||
const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
|
||||
const int16x8_t a_1 = vld1q_s16(levels + 8);
|
||||
|
||||
const uint16x8_t b_0 = vshrq_n_u16(vtstq_s16(a_0, tst_ones), 15);
|
||||
const uint16x8_t b_1 = vshrq_n_u16(vtstq_s16(a_1, tst_ones), 15);
|
||||
|
||||
sum = vpadalq_u16(sum, b_0);
|
||||
sum = vpadalq_u16(sum, b_1);
|
||||
|
||||
levels += 16;
|
||||
}
|
||||
return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define IsFlat IsFlat_C
|
||||
|
||||
static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
|
||||
int thresh) {
|
||||
int score = 0;
|
||||
while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
|
||||
int i;
|
||||
for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
|
||||
score += (levels[i] != 0);
|
||||
if (score > thresh) return 0;
|
||||
}
|
||||
levels += 16;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) &&
|
||||
// !defined(WEBP_HAVE_NEON_RTCD)
|
||||
|
||||
#endif // WEBP_DSP_QUANT_H_
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../utils/rescaler_utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -21,11 +21,13 @@
|
||||
|
||||
#define ROUNDER (WEBP_RESCALER_ONE >> 1)
|
||||
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
|
||||
#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@@ -56,7 +58,8 @@ void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@@ -92,7 +95,7 @@ void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Row export
|
||||
|
||||
void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
|
||||
void WebPRescalerExportRowExpand_C(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -123,7 +126,7 @@ void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
|
||||
void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -136,7 +139,7 @@ void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
|
||||
if (yscale) {
|
||||
for (x_out = 0; x_out < x_out_max; ++x_out) {
|
||||
const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
|
||||
const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
|
||||
const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale);
|
||||
assert(v >= 0 && v <= 255);
|
||||
dst[x_out] = v;
|
||||
irow[x_out] = frac; // new fractional start
|
||||
@@ -151,6 +154,7 @@ void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
#undef MULT_FIX_FLOOR
|
||||
#undef MULT_FIX
|
||||
#undef ROUNDER
|
||||
|
||||
@@ -202,16 +206,15 @@ extern void WebPRescalerDspInitMIPSdspR2(void);
|
||||
extern void WebPRescalerDspInitMSA(void);
|
||||
extern void WebPRescalerDspInitNEON(void);
|
||||
|
||||
static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&rescaler_last_cpuinfo_used;
|
||||
WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C;
|
||||
WebPRescalerExportRowShrink = WebPRescalerExportRowShrink_C;
|
||||
#endif
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
|
||||
if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPRescalerImportRowExpand = WebPRescalerImportRowExpandC;
|
||||
WebPRescalerImportRowShrink = WebPRescalerImportRowShrinkC;
|
||||
WebPRescalerExportRowExpand = WebPRescalerExportRowExpandC;
|
||||
WebPRescalerExportRowShrink = WebPRescalerExportRowShrinkC;
|
||||
WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C;
|
||||
WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@@ -219,11 +222,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
|
||||
WebPRescalerDspInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
WebPRescalerDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
WebPRescalerDspInitMIPS32();
|
||||
@@ -240,5 +238,17 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
rescaler_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPRescalerDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPRescalerExportRowExpand != NULL);
|
||||
assert(WebPRescalerExportRowShrink != NULL);
|
||||
assert(WebPRescalerImportRowExpand != NULL);
|
||||
assert(WebPRescalerImportRowShrink != NULL);
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
}
|
||||
|
||||
@@ -11,17 +11,18 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
#include "./neon.h"
|
||||
#include "../dsp/neon.h"
|
||||
#include "../utils/rescaler_utils.h"
|
||||
|
||||
#define ROUNDER (WEBP_RESCALER_ONE >> 1)
|
||||
#define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
|
||||
#define MULT_FIX_FLOOR_C(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
|
||||
|
||||
#define LOAD_32x4(SRC, DST) const uint32x4_t DST = vld1q_u32((SRC))
|
||||
#define LOAD_32x8(SRC, DST0, DST1) \
|
||||
@@ -35,15 +36,18 @@
|
||||
|
||||
#if (WEBP_RESCALER_RFIX == 32)
|
||||
#define MAKE_HALF_CST(C) vdupq_n_s32((int32_t)((C) >> 1))
|
||||
#define MULT_FIX(A, B) /* note: B is actualy scale>>1. See MAKE_HALF_CST */ \
|
||||
// note: B is actualy scale>>1. See MAKE_HALF_CST
|
||||
#define MULT_FIX(A, B) \
|
||||
vreinterpretq_u32_s32(vqrdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
|
||||
#define MULT_FIX_FLOOR(A, B) \
|
||||
vreinterpretq_u32_s32(vqdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
|
||||
#else
|
||||
#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
|
||||
#endif
|
||||
|
||||
static uint32x4_t Interpolate(const rescaler_t* const frow,
|
||||
const rescaler_t* const irow,
|
||||
uint32_t A, uint32_t B) {
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* const frow,
|
||||
const rescaler_t* const irow,
|
||||
uint32_t A, uint32_t B) {
|
||||
LOAD_32x4(frow, A0);
|
||||
LOAD_32x4(irow, B0);
|
||||
const uint64x2_t C0 = vmull_n_u32(vget_low_u32(A0), A);
|
||||
@@ -56,7 +60,7 @@ static uint32x4_t Interpolate(const rescaler_t* const frow,
|
||||
return E;
|
||||
}
|
||||
|
||||
static void RescalerExportRowExpand(WebPRescaler* const wrk) {
|
||||
static void RescalerExportRowExpand_NEON(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -91,9 +95,9 @@ static void RescalerExportRowExpand(WebPRescaler* const wrk) {
|
||||
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
|
||||
for (x_out = 0; x_out < max_span; x_out += 8) {
|
||||
const uint32x4_t C0 =
|
||||
Interpolate(frow + x_out + 0, irow + x_out + 0, A, B);
|
||||
Interpolate_NEON(frow + x_out + 0, irow + x_out + 0, A, B);
|
||||
const uint32x4_t C1 =
|
||||
Interpolate(frow + x_out + 4, irow + x_out + 4, A, B);
|
||||
Interpolate_NEON(frow + x_out + 4, irow + x_out + 4, A, B);
|
||||
const uint32x4_t D0 = MULT_FIX(C0, fy_scale_half);
|
||||
const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half);
|
||||
const uint16x4_t E0 = vmovn_u32(D0);
|
||||
@@ -112,7 +116,7 @@ static void RescalerExportRowExpand(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
static void RescalerExportRowShrink(WebPRescaler* const wrk) {
|
||||
static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -135,8 +139,8 @@ static void RescalerExportRowShrink(WebPRescaler* const wrk) {
|
||||
const uint32x4_t A1 = MULT_FIX(in1, yscale_half);
|
||||
const uint32x4_t B0 = vqsubq_u32(in2, A0);
|
||||
const uint32x4_t B1 = vqsubq_u32(in3, A1);
|
||||
const uint32x4_t C0 = MULT_FIX(B0, fxy_scale_half);
|
||||
const uint32x4_t C1 = MULT_FIX(B1, fxy_scale_half);
|
||||
const uint32x4_t C0 = MULT_FIX_FLOOR(B0, fxy_scale_half);
|
||||
const uint32x4_t C1 = MULT_FIX_FLOOR(B1, fxy_scale_half);
|
||||
const uint16x4_t D0 = vmovn_u32(C0);
|
||||
const uint16x4_t D1 = vmovn_u32(C1);
|
||||
const uint8x8_t E = vmovn_u16(vcombine_u16(D0, D1));
|
||||
@@ -145,7 +149,7 @@ static void RescalerExportRowShrink(WebPRescaler* const wrk) {
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const uint32_t frac = (uint32_t)MULT_FIX_C(frow[x_out], yscale);
|
||||
const int v = (int)MULT_FIX_C(irow[x_out] - frac, wrk->fxy_scale);
|
||||
const int v = (int)MULT_FIX_FLOOR_C(irow[x_out] - frac, fxy_scale);
|
||||
assert(v >= 0 && v <= 255);
|
||||
dst[x_out] = v;
|
||||
irow[x_out] = frac; // new fractional start
|
||||
@@ -170,13 +174,19 @@ static void RescalerExportRowShrink(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
#undef MULT_FIX_FLOOR_C
|
||||
#undef MULT_FIX_C
|
||||
#undef MULT_FIX_FLOOR
|
||||
#undef MULT_FIX
|
||||
#undef ROUNDER
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPRescalerDspInitNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitNEON(void) {
|
||||
WebPRescalerExportRowExpand = RescalerExportRowExpand;
|
||||
WebPRescalerExportRowShrink = RescalerExportRowShrink;
|
||||
WebPRescalerExportRowExpand = RescalerExportRowExpand_NEON;
|
||||
WebPRescalerExportRowShrink = RescalerExportRowShrink_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
@@ -11,9 +11,9 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_REDUCE_SIZE)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <assert.h>
|
||||
@@ -25,9 +25,10 @@
|
||||
|
||||
#define ROUNDER (WEBP_RESCALER_ONE >> 1)
|
||||
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
|
||||
#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
|
||||
|
||||
// input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
|
||||
static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
|
||||
static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
||||
const __m128i B = _mm_unpacklo_epi8(A, zero); // A0B0C0D0E0F0G0H0
|
||||
@@ -36,28 +37,30 @@ static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
|
||||
}
|
||||
|
||||
// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
|
||||
static void LoadHeightPixels(const uint8_t* const src, __m128i* out) {
|
||||
static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
||||
*out = _mm_unpacklo_epi8(A, zero);
|
||||
}
|
||||
|
||||
static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
rescaler_t* frow = wrk->frow;
|
||||
const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
int accum = x_add;
|
||||
__m128i cur_pixels;
|
||||
|
||||
// SSE2 implementation only works with 16b signed arithmetic at max.
|
||||
if (wrk->src_width < 8 || accum >= (1 << 15)) {
|
||||
WebPRescalerImportRowExpand_C(wrk, src);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!WebPRescalerInputDone(wrk));
|
||||
assert(wrk->x_expand);
|
||||
if (wrk->num_channels == 4) {
|
||||
if (wrk->src_width < 2) {
|
||||
WebPRescalerImportRowExpandC(wrk, src);
|
||||
return;
|
||||
}
|
||||
LoadTwoPixels(src, &cur_pixels);
|
||||
LoadTwoPixels_SSE2(src, &cur_pixels);
|
||||
src += 4;
|
||||
while (1) {
|
||||
const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum);
|
||||
@@ -67,7 +70,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
|
||||
if (frow >= frow_end) break;
|
||||
accum -= wrk->x_sub;
|
||||
if (accum < 0) {
|
||||
LoadTwoPixels(src, &cur_pixels);
|
||||
LoadTwoPixels_SSE2(src, &cur_pixels);
|
||||
src += 4;
|
||||
accum += x_add;
|
||||
}
|
||||
@@ -75,11 +78,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
|
||||
} else {
|
||||
int left;
|
||||
const uint8_t* const src_limit = src + wrk->src_width - 8;
|
||||
if (wrk->src_width < 8) {
|
||||
WebPRescalerImportRowExpandC(wrk, src);
|
||||
return;
|
||||
}
|
||||
LoadHeightPixels(src, &cur_pixels);
|
||||
LoadEightPixels_SSE2(src, &cur_pixels);
|
||||
src += 7;
|
||||
left = 7;
|
||||
while (1) {
|
||||
@@ -94,7 +93,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
|
||||
if (--left) {
|
||||
cur_pixels = _mm_srli_si128(cur_pixels, 2);
|
||||
} else if (src <= src_limit) {
|
||||
LoadHeightPixels(src, &cur_pixels);
|
||||
LoadEightPixels_SSE2(src, &cur_pixels);
|
||||
src += 7;
|
||||
left = 7;
|
||||
} else { // tail
|
||||
@@ -110,8 +109,8 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
|
||||
assert(accum == 0);
|
||||
}
|
||||
|
||||
static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_sub = wrk->x_sub;
|
||||
int accum = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@@ -123,7 +122,7 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
|
||||
const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width;
|
||||
|
||||
if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) {
|
||||
WebPRescalerImportRowShrinkC(wrk, src);
|
||||
WebPRescalerImportRowShrink_C(wrk, src);
|
||||
return;
|
||||
}
|
||||
assert(!WebPRescalerInputDone(wrk));
|
||||
@@ -169,12 +168,12 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
|
||||
// Row export
|
||||
|
||||
// load *src as epi64, multiply by mult and store result in [out0 ... out3]
|
||||
static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
__m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
__m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
|
||||
const __m128i A2 = _mm_srli_epi64(A0, 32);
|
||||
@@ -192,12 +191,12 @@ static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ProcessRow(const __m128i* const A0,
|
||||
const __m128i* const A1,
|
||||
const __m128i* const A2,
|
||||
const __m128i* const A3,
|
||||
const __m128i* const mult,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
|
||||
const __m128i* const A1,
|
||||
const __m128i* const A2,
|
||||
const __m128i* const A3,
|
||||
const __m128i* const mult,
|
||||
uint8_t* const dst) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
|
||||
const __m128i B0 = _mm_mul_epu32(*A0, *mult);
|
||||
@@ -210,7 +209,7 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0,
|
||||
const __m128i C3 = _mm_add_epi64(B3, rounder);
|
||||
const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX);
|
||||
const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
|
||||
#if (WEBP_RESCALER_FIX < 32)
|
||||
#if (WEBP_RESCALER_RFIX < 32)
|
||||
const __m128i D2 =
|
||||
_mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask);
|
||||
const __m128i D3 =
|
||||
@@ -226,7 +225,36 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0,
|
||||
_mm_storel_epi64((__m128i*)dst, G);
|
||||
}
|
||||
|
||||
static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ProcessRow_Floor_SSE2(const __m128i* const A0,
|
||||
const __m128i* const A1,
|
||||
const __m128i* const A2,
|
||||
const __m128i* const A3,
|
||||
const __m128i* const mult,
|
||||
uint8_t* const dst) {
|
||||
const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
|
||||
const __m128i B0 = _mm_mul_epu32(*A0, *mult);
|
||||
const __m128i B1 = _mm_mul_epu32(*A1, *mult);
|
||||
const __m128i B2 = _mm_mul_epu32(*A2, *mult);
|
||||
const __m128i B3 = _mm_mul_epu32(*A3, *mult);
|
||||
const __m128i D0 = _mm_srli_epi64(B0, WEBP_RESCALER_RFIX);
|
||||
const __m128i D1 = _mm_srli_epi64(B1, WEBP_RESCALER_RFIX);
|
||||
#if (WEBP_RESCALER_RFIX < 32)
|
||||
const __m128i D2 =
|
||||
_mm_and_si128(_mm_slli_epi64(B2, 32 - WEBP_RESCALER_RFIX), mask);
|
||||
const __m128i D3 =
|
||||
_mm_and_si128(_mm_slli_epi64(B3, 32 - WEBP_RESCALER_RFIX), mask);
|
||||
#else
|
||||
const __m128i D2 = _mm_and_si128(B2, mask);
|
||||
const __m128i D3 = _mm_and_si128(B3, mask);
|
||||
#endif
|
||||
const __m128i E0 = _mm_or_si128(D0, D2);
|
||||
const __m128i E1 = _mm_or_si128(D1, D3);
|
||||
const __m128i F = _mm_packs_epi32(E0, E1);
|
||||
const __m128i G = _mm_packus_epi16(F, F);
|
||||
_mm_storel_epi64((__m128i*)dst, G);
|
||||
}
|
||||
|
||||
static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -240,8 +268,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
|
||||
if (wrk->y_accum == 0) {
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3;
|
||||
LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const uint32_t J = frow[x_out];
|
||||
@@ -257,8 +285,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3, B0, B1, B2, B3;
|
||||
LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, &mA, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, &mB, &B0, &B1, &B2, &B3);
|
||||
{
|
||||
const __m128i C0 = _mm_add_epi64(A0, B0);
|
||||
const __m128i C1 = _mm_add_epi64(A1, B1);
|
||||
@@ -272,7 +300,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
|
||||
const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX);
|
||||
const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX);
|
||||
const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX);
|
||||
ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out);
|
||||
ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult, dst + x_out);
|
||||
}
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
@@ -286,7 +314,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
|
||||
int x_out;
|
||||
uint8_t* const dst = wrk->dst;
|
||||
rescaler_t* const irow = wrk->irow;
|
||||
@@ -303,8 +331,8 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3, B0, B1, B2, B3;
|
||||
LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
|
||||
{
|
||||
const __m128i C0 = _mm_add_epi64(B0, rounder);
|
||||
const __m128i C1 = _mm_add_epi64(B1, rounder);
|
||||
@@ -324,12 +352,12 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
const __m128i G1 = _mm_or_si128(D1, F3);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
|
||||
ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
|
||||
ProcessRow_Floor_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
|
||||
}
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const uint32_t frac = (int)MULT_FIX(frow[x_out], yscale);
|
||||
const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
|
||||
const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale);
|
||||
assert(v >= 0 && v <= 255);
|
||||
dst[x_out] = v;
|
||||
irow[x_out] = frac; // new fractional start
|
||||
@@ -340,10 +368,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3;
|
||||
LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 0), zero);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 4), zero);
|
||||
ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const int v = (int)MULT_FIX(irow[x_out], scale);
|
||||
@@ -354,6 +382,7 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
}
|
||||
}
|
||||
|
||||
#undef MULT_FIX_FLOOR
|
||||
#undef MULT_FIX
|
||||
#undef ROUNDER
|
||||
|
||||
@@ -362,10 +391,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
|
||||
extern void WebPRescalerDspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitSSE2(void) {
|
||||
WebPRescalerImportRowExpand = RescalerImportRowExpandSSE2;
|
||||
WebPRescalerImportRowShrink = RescalerImportRowShrinkSSE2;
|
||||
WebPRescalerExportRowExpand = RescalerExportRowExpandSSE2;
|
||||
WebPRescalerExportRowShrink = RescalerExportRowShrinkSSE2;
|
||||
WebPRescalerImportRowExpand = RescalerImportRowExpand_SSE2;
|
||||
WebPRescalerImportRowShrink = RescalerImportRowShrink_SSE2;
|
||||
WebPRescalerExportRowExpand = RescalerExportRowExpand_SSE2;
|
||||
WebPRescalerExportRowShrink = RescalerExportRowShrink_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
+124
-63
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "./yuv.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
@@ -63,17 +63,17 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \
|
||||
const uint32_t uv1 = (diag_03 + t_uv) >> 1; \
|
||||
FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
|
||||
top_dst + (2 * x - 1) * XSTEP); \
|
||||
top_dst + (2 * x - 1) * (XSTEP)); \
|
||||
FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \
|
||||
top_dst + (2 * x - 0) * XSTEP); \
|
||||
top_dst + (2 * x - 0) * (XSTEP)); \
|
||||
} \
|
||||
if (bottom_y != NULL) { \
|
||||
const uint32_t uv0 = (diag_03 + l_uv) >> 1; \
|
||||
const uint32_t uv1 = (diag_12 + uv) >> 1; \
|
||||
FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
|
||||
bottom_dst + (2 * x - 1) * XSTEP); \
|
||||
bottom_dst + (2 * x - 1) * (XSTEP)); \
|
||||
FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \
|
||||
bottom_dst + (2 * x + 0) * XSTEP); \
|
||||
bottom_dst + (2 * x + 0) * (XSTEP)); \
|
||||
} \
|
||||
tl_uv = t_uv; \
|
||||
l_uv = uv; \
|
||||
@@ -82,24 +82,50 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
{ \
|
||||
const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
|
||||
FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
|
||||
top_dst + (len - 1) * XSTEP); \
|
||||
top_dst + (len - 1) * (XSTEP)); \
|
||||
} \
|
||||
if (bottom_y != NULL) { \
|
||||
const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \
|
||||
FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
|
||||
bottom_dst + (len - 1) * XSTEP); \
|
||||
bottom_dst + (len - 1) * (XSTEP)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// All variants implemented.
|
||||
UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
|
||||
UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
|
||||
UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
|
||||
UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
|
||||
UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
|
||||
UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
|
||||
UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
UPSAMPLE_FUNC(UpsampleRgbaLinePair_C, VP8YuvToRgba, 4)
|
||||
UPSAMPLE_FUNC(UpsampleBgraLinePair_C, VP8YuvToBgra, 4)
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
UPSAMPLE_FUNC(UpsampleArgbLinePair_C, VP8YuvToArgb, 4)
|
||||
UPSAMPLE_FUNC(UpsampleRgbLinePair_C, VP8YuvToRgb, 3)
|
||||
UPSAMPLE_FUNC(UpsampleBgrLinePair_C, VP8YuvToBgr, 3)
|
||||
UPSAMPLE_FUNC(UpsampleRgba4444LinePair_C, VP8YuvToRgba4444, 2)
|
||||
UPSAMPLE_FUNC(UpsampleRgb565LinePair_C, VP8YuvToRgb565, 2)
|
||||
#else
|
||||
static void EmptyUpsampleFunc(const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
const uint8_t* top_u, const uint8_t* top_v,
|
||||
const uint8_t* cur_u, const uint8_t* cur_v,
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) {
|
||||
(void)top_y;
|
||||
(void)bottom_y;
|
||||
(void)top_u;
|
||||
(void)top_v;
|
||||
(void)cur_u;
|
||||
(void)cur_v;
|
||||
(void)top_dst;
|
||||
(void)bottom_dst;
|
||||
(void)len;
|
||||
assert(0); // COLORSPACE SUPPORT NOT COMPILED
|
||||
}
|
||||
#define UpsampleArgbLinePair_C EmptyUpsampleFunc
|
||||
#define UpsampleRgbLinePair_C EmptyUpsampleFunc
|
||||
#define UpsampleBgrLinePair_C EmptyUpsampleFunc
|
||||
#define UpsampleRgba4444LinePair_C EmptyUpsampleFunc
|
||||
#define UpsampleRgb565LinePair_C EmptyUpsampleFunc
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
#endif
|
||||
|
||||
#undef LOAD_UV
|
||||
#undef UPSAMPLE_FUNC
|
||||
@@ -141,7 +167,6 @@ DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
|
||||
|
||||
WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
|
||||
WebPInitUpsamplers();
|
||||
VP8YUVInit();
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
|
||||
#else
|
||||
@@ -158,16 +183,33 @@ extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * (XSTEP)]); \
|
||||
}
|
||||
|
||||
YUV444_FUNC(WebPYuv444ToRgbC, VP8YuvToRgb, 3)
|
||||
YUV444_FUNC(WebPYuv444ToBgrC, VP8YuvToBgr, 3)
|
||||
YUV444_FUNC(WebPYuv444ToRgbaC, VP8YuvToRgba, 4)
|
||||
YUV444_FUNC(WebPYuv444ToBgraC, VP8YuvToBgra, 4)
|
||||
YUV444_FUNC(WebPYuv444ToArgbC, VP8YuvToArgb, 4)
|
||||
YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
|
||||
YUV444_FUNC(WebPYuv444ToRgb565C, VP8YuvToRgb565, 2)
|
||||
YUV444_FUNC(WebPYuv444ToRgba_C, VP8YuvToRgba, 4)
|
||||
YUV444_FUNC(WebPYuv444ToBgra_C, VP8YuvToBgra, 4)
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
YUV444_FUNC(WebPYuv444ToRgb_C, VP8YuvToRgb, 3)
|
||||
YUV444_FUNC(WebPYuv444ToBgr_C, VP8YuvToBgr, 3)
|
||||
YUV444_FUNC(WebPYuv444ToArgb_C, VP8YuvToArgb, 4)
|
||||
YUV444_FUNC(WebPYuv444ToRgba4444_C, VP8YuvToRgba4444, 2)
|
||||
YUV444_FUNC(WebPYuv444ToRgb565_C, VP8YuvToRgb565, 2)
|
||||
#else
|
||||
static void EmptyYuv444Func(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
(void)y;
|
||||
(void)u;
|
||||
(void)v;
|
||||
(void)dst;
|
||||
(void)len;
|
||||
}
|
||||
#define WebPYuv444ToRgb_C EmptyYuv444Func
|
||||
#define WebPYuv444ToBgr_C EmptyYuv444Func
|
||||
#define WebPYuv444ToArgb_C EmptyYuv444Func
|
||||
#define WebPYuv444ToRgba4444_C EmptyYuv444Func
|
||||
#define WebPYuv444ToRgb565_C EmptyYuv444Func
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
#undef YUV444_FUNC
|
||||
|
||||
@@ -175,24 +217,20 @@ WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
|
||||
|
||||
extern void WebPInitYUV444ConvertersMIPSdspR2(void);
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
|
||||
static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
|
||||
(VP8CPUInfo)&upsampling_last_cpuinfo_used1;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
|
||||
if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
|
||||
|
||||
WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgbC;
|
||||
WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgbaC;
|
||||
WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgrC;
|
||||
WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgraC;
|
||||
WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgbC;
|
||||
WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
|
||||
WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565C;
|
||||
WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgbaC;
|
||||
WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgraC;
|
||||
WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgbC;
|
||||
WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
|
||||
WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
|
||||
WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgba_C;
|
||||
WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgra_C;
|
||||
WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgb_C;
|
||||
WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgr_C;
|
||||
WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgb_C;
|
||||
WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444_C;
|
||||
WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565_C;
|
||||
WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgba_C;
|
||||
WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgra_C;
|
||||
WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgb_C;
|
||||
WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C;
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@@ -200,41 +238,43 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
|
||||
WebPInitYUV444ConvertersSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitYUV444ConvertersSSE41();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
WebPInitYUV444ConvertersMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
upsampling_last_cpuinfo_used1 = VP8GetCPUInfo;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main calls
|
||||
|
||||
extern void WebPInitUpsamplersSSE2(void);
|
||||
extern void WebPInitUpsamplersSSE41(void);
|
||||
extern void WebPInitUpsamplersNEON(void);
|
||||
extern void WebPInitUpsamplersMIPSdspR2(void);
|
||||
extern void WebPInitUpsamplersMSA(void);
|
||||
|
||||
static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 =
|
||||
(VP8CPUInfo)&upsampling_last_cpuinfo_used2;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
|
||||
if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_C;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_C;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_C;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_C;
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_C;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_C;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_C;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_C;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_C;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_C;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_C;
|
||||
#endif
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@@ -243,9 +283,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
|
||||
WebPInitUpsamplersSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
WebPInitUpsamplersNEON();
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitUpsamplersSSE41();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
@@ -259,8 +299,29 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitUpsamplersNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPUpsamplers[MODE_RGBA] != NULL);
|
||||
assert(WebPUpsamplers[MODE_BGRA] != NULL);
|
||||
assert(WebPUpsamplers[MODE_rgbA] != NULL);
|
||||
assert(WebPUpsamplers[MODE_bgrA] != NULL);
|
||||
#if !defined(WEBP_REDUCE_CSP) || !WEBP_NEON_OMIT_C_CODE
|
||||
assert(WebPUpsamplers[MODE_RGB] != NULL);
|
||||
assert(WebPUpsamplers[MODE_BGR] != NULL);
|
||||
assert(WebPUpsamplers[MODE_ARGB] != NULL);
|
||||
assert(WebPUpsamplers[MODE_RGBA_4444] != NULL);
|
||||
assert(WebPUpsamplers[MODE_RGB_565] != NULL);
|
||||
assert(WebPUpsamplers[MODE_Argb] != NULL);
|
||||
assert(WebPUpsamplers[MODE_rgbA_4444] != NULL);
|
||||
#endif
|
||||
|
||||
#endif // FANCY_UPSAMPLING
|
||||
upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -12,15 +12,15 @@
|
||||
// Author: mans@mansr.com (Mans Rullgard)
|
||||
// Based on SSE code by: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <assert.h>
|
||||
#include <arm_neon.h>
|
||||
#include <string.h>
|
||||
#include "./neon.h"
|
||||
#include "./yuv.h"
|
||||
#include "../dsp/neon.h"
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
|
||||
uint8_t *out) {
|
||||
static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2,
|
||||
uint8_t *out) {
|
||||
UPSAMPLE_16PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
|
||||
/* replicate last byte */ \
|
||||
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \
|
||||
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \
|
||||
Upsample16Pixels(r1, r2, out); \
|
||||
Upsample16Pixels_NEON(r1, r2, out); \
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
@@ -243,13 +243,15 @@ static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
|
||||
}
|
||||
|
||||
// NEON variants of the fancy upsampler.
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair, Rgb, 3)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair, Bgr, 3)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair, Rgba, 4)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair, Bgra, 4)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleArgbLinePair, Argb, 4)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, Rgba4444, 2)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgb565LinePair, Rgb565, 2)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair_NEON, Rgba, 4)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair_NEON, Bgra, 4)
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair_NEON, Rgb, 3)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair_NEON, Bgr, 3)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleArgbLinePair_NEON, Argb, 4)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgba4444LinePair_NEON, Rgba4444, 2)
|
||||
NEON_UPSAMPLE_FUNC(UpsampleRgb565LinePair_NEON, Rgb565, 2)
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
@@ -259,17 +261,19 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
extern void WebPInitUpsamplersNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersNEON(void) {
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_NEON;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_NEON;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_NEON;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_NEON;
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_NEON;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_NEON;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_NEON;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_NEON;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_NEON;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_NEON;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_NEON;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
@@ -11,14 +11,14 @@
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
#include <string.h>
|
||||
#include "./yuv.h"
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@@ -83,13 +83,13 @@
|
||||
GET_M(ad, s, diag2); /* diag2 = (3a + b + c + 3d) / 8 */ \
|
||||
\
|
||||
/* pack the alternate pixels */ \
|
||||
PACK_AND_STORE(a, b, diag1, diag2, out + 0); /* store top */ \
|
||||
PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32); /* store bottom */ \
|
||||
PACK_AND_STORE(a, b, diag1, diag2, (out) + 0); /* store top */ \
|
||||
PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32); /* store bottom */ \
|
||||
}
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@@ -101,30 +101,15 @@ static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
|
||||
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels)); \
|
||||
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels)); \
|
||||
/* using the shared function instead of the macro saves ~3k code size */ \
|
||||
Upsample32Pixels(r1, r2, out); \
|
||||
}
|
||||
|
||||
#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, \
|
||||
top_dst, bottom_dst, cur_x, num_pixels) { \
|
||||
int n; \
|
||||
for (n = 0; n < (num_pixels); ++n) { \
|
||||
FUNC(top_y[(cur_x) + n], r_u[n], r_v[n], \
|
||||
top_dst + ((cur_x) + n) * XSTEP); \
|
||||
} \
|
||||
if (bottom_y != NULL) { \
|
||||
for (n = 0; n < (num_pixels); ++n) { \
|
||||
FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n], \
|
||||
bottom_dst + ((cur_x) + n) * XSTEP); \
|
||||
} \
|
||||
} \
|
||||
Upsample32Pixels_SSE2(r1, r2, out); \
|
||||
}
|
||||
|
||||
#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, \
|
||||
top_dst, bottom_dst, cur_x) do { \
|
||||
FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP); \
|
||||
if (bottom_y != NULL) { \
|
||||
FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64, \
|
||||
bottom_dst + (cur_x) * XSTEP); \
|
||||
FUNC##32_SSE2((top_y) + (cur_x), r_u, r_v, (top_dst) + (cur_x) * (XSTEP)); \
|
||||
if ((bottom_y) != NULL) { \
|
||||
FUNC##32_SSE2((bottom_y) + (cur_x), r_u + 64, r_v + 64, \
|
||||
(bottom_dst) + (cur_x) * (XSTEP)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
@@ -135,7 +120,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[4 * 32 + 15]; \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
uint8_t* const r_v = r_u + 32; \
|
||||
\
|
||||
@@ -160,22 +145,36 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
} \
|
||||
if (len > 1) { \
|
||||
const int left_over = ((len + 1) >> 1) - (pos >> 1); \
|
||||
uint8_t* const tmp_top_dst = r_u + 4 * 32; \
|
||||
uint8_t* const tmp_bottom_dst = tmp_top_dst + 4 * 32; \
|
||||
uint8_t* const tmp_top = tmp_bottom_dst + 4 * 32; \
|
||||
uint8_t* const tmp_bottom = (bottom_y == NULL) ? NULL : tmp_top + 32; \
|
||||
assert(left_over > 0); \
|
||||
UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u); \
|
||||
UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v); \
|
||||
CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, \
|
||||
pos, len - pos); \
|
||||
memcpy(tmp_top, top_y + pos, len - pos); \
|
||||
if (bottom_y != NULL) memcpy(tmp_bottom, bottom_y + pos, len - pos); \
|
||||
CONVERT2RGB_32(FUNC, XSTEP, tmp_top, tmp_bottom, tmp_top_dst, \
|
||||
tmp_bottom_dst, 0); \
|
||||
memcpy(top_dst + pos * (XSTEP), tmp_top_dst, (len - pos) * (XSTEP)); \
|
||||
if (bottom_y != NULL) { \
|
||||
memcpy(bottom_dst + pos * (XSTEP), tmp_bottom_dst, \
|
||||
(len - pos) * (XSTEP)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// SSE2 variants of the fancy upsampler.
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair_SSE2, VP8YuvToRgba, 4)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair_SSE2, VP8YuvToBgra, 4)
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE2, VP8YuvToRgb, 3)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE2, VP8YuvToBgr, 3)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleArgbLinePair_SSE2, VP8YuvToArgb, 4)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgba4444LinePair_SSE2, VP8YuvToRgba4444, 2)
|
||||
SSE2_UPSAMPLE_FUNC(UpsampleRgb565LinePair_SSE2, VP8YuvToRgb565, 2)
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
#undef GET_M
|
||||
#undef PACK_AND_STORE
|
||||
@@ -193,17 +192,19 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
extern void WebPInitUpsamplersSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
|
||||
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_SSE2;
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_SSE2;
|
||||
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_SSE2;
|
||||
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_SSE2;
|
||||
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_SSE2;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
|
||||
#endif // FANCY_UPSAMPLING
|
||||
@@ -213,29 +214,46 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \
|
||||
extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u, \
|
||||
const uint8_t* v, uint8_t* dst, int len); \
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
CALL(y + i, u + i, v + i, dst + i * (XSTEP)); \
|
||||
} \
|
||||
if (i < len) { /* C-fallback */ \
|
||||
WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i); \
|
||||
CALL_C(y + i, u + i, v + i, dst + i * (XSTEP), len - i); \
|
||||
} \
|
||||
}
|
||||
|
||||
YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
|
||||
YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
|
||||
YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
|
||||
YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
|
||||
YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4);
|
||||
YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4);
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3);
|
||||
YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3);
|
||||
YUV444_FUNC(Yuv444ToArgb_SSE2, VP8YuvToArgb32_SSE2, WebPYuv444ToArgb_C, 4)
|
||||
YUV444_FUNC(Yuv444ToRgba4444_SSE2, VP8YuvToRgba444432_SSE2, \
|
||||
WebPYuv444ToRgba4444_C, 2)
|
||||
YUV444_FUNC(Yuv444ToRgb565_SSE2, VP8YuvToRgb56532_SSE2, WebPYuv444ToRgb565_C, 2)
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
|
||||
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
|
||||
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
|
||||
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb;
|
||||
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr;
|
||||
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba_SSE2;
|
||||
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra_SSE2;
|
||||
WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba_SSE2;
|
||||
WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra_SSE2;
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb_SSE2;
|
||||
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr_SSE2;
|
||||
WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb_SSE2;
|
||||
WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444_SSE2;
|
||||
WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565_SSE2;
|
||||
WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb_SSE2;
|
||||
WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444_SSE2;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// SSE41 version of YUV to RGB upsampling functions.
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include <assert.h>
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
|
||||
// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
|
||||
// u = (9*a + 3*b + 3*c + d + 8) / 16
|
||||
// = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
|
||||
// = (a + m + 1) / 2
|
||||
// where m = (a + 3*b + 3*c + d) / 8
|
||||
// = ((a + b + c + d) / 2 + b + c) / 4
|
||||
//
|
||||
// Let's say k = (a + b + c + d) / 4.
|
||||
// We can compute k as
|
||||
// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
|
||||
// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
|
||||
//
|
||||
// Then m can be written as
|
||||
// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1
|
||||
|
||||
// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
|
||||
#define GET_M(ij, in, out) do { \
|
||||
const __m128i tmp0 = _mm_avg_epu8(k, (in)); /* (k + in + 1) / 2 */ \
|
||||
const __m128i tmp1 = _mm_and_si128((ij), st); /* (ij) & (s^t) */ \
|
||||
const __m128i tmp2 = _mm_xor_si128(k, (in)); /* (k^in) */ \
|
||||
const __m128i tmp3 = _mm_or_si128(tmp1, tmp2); /* ((ij) & (s^t)) | (k^in) */\
|
||||
const __m128i tmp4 = _mm_and_si128(tmp3, one); /* & 1 -> lsb_correction */ \
|
||||
(out) = _mm_sub_epi8(tmp0, tmp4); /* (k + in + 1) / 2 - lsb_correction */ \
|
||||
} while (0)
|
||||
|
||||
// pack and store two alternating pixel rows
|
||||
#define PACK_AND_STORE(a, b, da, db, out) do { \
|
||||
const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
|
||||
const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
|
||||
const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b); \
|
||||
const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b); \
|
||||
_mm_store_si128(((__m128i*)(out)) + 0, t_1); \
|
||||
_mm_store_si128(((__m128i*)(out)) + 1, t_2); \
|
||||
} while (0)
|
||||
|
||||
// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
|
||||
#define UPSAMPLE_32PIXELS(r1, r2, out) { \
|
||||
const __m128i one = _mm_set1_epi8(1); \
|
||||
const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]); \
|
||||
const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]); \
|
||||
const __m128i c = _mm_loadu_si128((const __m128i*)&(r2)[0]); \
|
||||
const __m128i d = _mm_loadu_si128((const __m128i*)&(r2)[1]); \
|
||||
\
|
||||
const __m128i s = _mm_avg_epu8(a, d); /* s = (a + d + 1) / 2 */ \
|
||||
const __m128i t = _mm_avg_epu8(b, c); /* t = (b + c + 1) / 2 */ \
|
||||
const __m128i st = _mm_xor_si128(s, t); /* st = s^t */ \
|
||||
\
|
||||
const __m128i ad = _mm_xor_si128(a, d); /* ad = a^d */ \
|
||||
const __m128i bc = _mm_xor_si128(b, c); /* bc = b^c */ \
|
||||
\
|
||||
const __m128i t1 = _mm_or_si128(ad, bc); /* (a^d) | (b^c) */ \
|
||||
const __m128i t2 = _mm_or_si128(t1, st); /* (a^d) | (b^c) | (s^t) */ \
|
||||
const __m128i t3 = _mm_and_si128(t2, one); /* (a^d) | (b^c) | (s^t) & 1 */ \
|
||||
const __m128i t4 = _mm_avg_epu8(s, t); \
|
||||
const __m128i k = _mm_sub_epi8(t4, t3); /* k = (a + b + c + d) / 4 */ \
|
||||
__m128i diag1, diag2; \
|
||||
\
|
||||
GET_M(bc, t, diag1); /* diag1 = (a + 3b + 3c + d) / 8 */ \
|
||||
GET_M(ad, s, diag2); /* diag2 = (3a + b + c + 3d) / 8 */ \
|
||||
\
|
||||
/* pack the alternate pixels */ \
|
||||
PACK_AND_STORE(a, b, diag1, diag2, (out) + 0); /* store top */ \
|
||||
PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32); /* store bottom */ \
|
||||
}
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \
|
||||
uint8_t r1[17], r2[17]; \
|
||||
memcpy(r1, (tb), (num_pixels)); \
|
||||
memcpy(r2, (bb), (num_pixels)); \
|
||||
/* replicate last byte */ \
|
||||
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels)); \
|
||||
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels)); \
|
||||
/* using the shared function instead of the macro saves ~3k code size */ \
|
||||
Upsample32Pixels_SSE41(r1, r2, out); \
|
||||
}
|
||||
|
||||
#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, \
|
||||
top_dst, bottom_dst, cur_x) do { \
|
||||
FUNC##32_SSE41((top_y) + (cur_x), r_u, r_v, (top_dst) + (cur_x) * (XSTEP)); \
|
||||
if ((bottom_y) != NULL) { \
|
||||
FUNC##32_SSE41((bottom_y) + (cur_x), r_u + 64, r_v + 64, \
|
||||
(bottom_dst) + (cur_x) * (XSTEP)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
uint8_t* const r_v = r_u + 32; \
|
||||
\
|
||||
assert(top_y != NULL); \
|
||||
{ /* Treat the first pixel in regular way */ \
|
||||
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
|
||||
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
|
||||
const int u0_t = (top_u[0] + u_diag) >> 1; \
|
||||
const int v0_t = (top_v[0] + v_diag) >> 1; \
|
||||
FUNC(top_y[0], u0_t, v0_t, top_dst); \
|
||||
if (bottom_y != NULL) { \
|
||||
const int u0_b = (cur_u[0] + u_diag) >> 1; \
|
||||
const int v0_b = (cur_v[0] + v_diag) >> 1; \
|
||||
FUNC(bottom_y[0], u0_b, v0_b, bottom_dst); \
|
||||
} \
|
||||
} \
|
||||
/* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */ \
|
||||
for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) { \
|
||||
UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u); \
|
||||
UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v); \
|
||||
CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos); \
|
||||
} \
|
||||
if (len > 1) { \
|
||||
const int left_over = ((len + 1) >> 1) - (pos >> 1); \
|
||||
uint8_t* const tmp_top_dst = r_u + 4 * 32; \
|
||||
uint8_t* const tmp_bottom_dst = tmp_top_dst + 4 * 32; \
|
||||
uint8_t* const tmp_top = tmp_bottom_dst + 4 * 32; \
|
||||
uint8_t* const tmp_bottom = (bottom_y == NULL) ? NULL : tmp_top + 32; \
|
||||
assert(left_over > 0); \
|
||||
UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u); \
|
||||
UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v); \
|
||||
memcpy(tmp_top, top_y + pos, len - pos); \
|
||||
if (bottom_y != NULL) memcpy(tmp_bottom, bottom_y + pos, len - pos); \
|
||||
CONVERT2RGB_32(FUNC, XSTEP, tmp_top, tmp_bottom, tmp_top_dst, \
|
||||
tmp_bottom_dst, 0); \
|
||||
memcpy(top_dst + pos * (XSTEP), tmp_top_dst, (len - pos) * (XSTEP)); \
|
||||
if (bottom_y != NULL) { \
|
||||
memcpy(bottom_dst + pos * (XSTEP), tmp_bottom_dst, \
|
||||
(len - pos) * (XSTEP)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// SSE4 variants of the fancy upsampler.
|
||||
SSE4_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE41, VP8YuvToRgb, 3)
|
||||
SSE4_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE41, VP8YuvToBgr, 3)
|
||||
|
||||
#undef GET_M
|
||||
#undef PACK_AND_STORE
|
||||
#undef UPSAMPLE_32PIXELS
|
||||
#undef UPSAMPLE_LAST_BLOCK
|
||||
#undef CONVERT2RGB
|
||||
#undef CONVERT2RGB_32
|
||||
#undef SSE4_UPSAMPLE_FUNC
|
||||
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
|
||||
extern void WebPInitUpsamplersSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE41(void) {
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_SSE41;
|
||||
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_SSE41;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
CALL(y + i, u + i, v + i, dst + i * (XSTEP)); \
|
||||
} \
|
||||
if (i < len) { /* C-fallback */ \
|
||||
CALL_C(y + i, u + i, v + i, dst + i * (XSTEP), len - i); \
|
||||
} \
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3);
|
||||
YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3);
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE41(void) {
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb_SSE41;
|
||||
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr_SSE41;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE41)
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE41))
|
||||
WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE41)
|
||||
#endif
|
||||
+48
-77
@@ -11,63 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./yuv.h"
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(WEBP_YUV_USE_TABLE)
|
||||
|
||||
static int done = 0;
|
||||
|
||||
static WEBP_INLINE uint8_t clip(int v, int max_value) {
|
||||
return v < 0 ? 0 : v > max_value ? max_value : v;
|
||||
}
|
||||
|
||||
int16_t VP8kVToR[256], VP8kUToB[256];
|
||||
int32_t VP8kVToG[256], VP8kUToG[256];
|
||||
uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
|
||||
uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
|
||||
int i;
|
||||
if (done) {
|
||||
return;
|
||||
}
|
||||
#ifndef USE_YUVj
|
||||
for (i = 0; i < 256; ++i) {
|
||||
VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
|
||||
VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
|
||||
VP8kVToG[i] = -45773 * (i - 128);
|
||||
VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
|
||||
}
|
||||
for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
|
||||
const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
|
||||
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
|
||||
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < 256; ++i) {
|
||||
VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
|
||||
VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
|
||||
VP8kVToG[i] = -46802 * (i - 128);
|
||||
VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
|
||||
}
|
||||
for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
|
||||
const int k = i;
|
||||
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
|
||||
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
|
||||
}
|
||||
#endif
|
||||
|
||||
done = 1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
|
||||
|
||||
#endif // WEBP_YUV_USE_TABLE
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Plain-C version
|
||||
|
||||
@@ -75,14 +23,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
const uint8_t* const end = dst + (len & ~1) * XSTEP; \
|
||||
const uint8_t* const end = dst + (len & ~1) * (XSTEP); \
|
||||
while (dst != end) { \
|
||||
FUNC(y[0], u[0], v[0], dst); \
|
||||
FUNC(y[1], u[0], v[0], dst + XSTEP); \
|
||||
FUNC(y[1], u[0], v[0], dst + (XSTEP)); \
|
||||
y += 2; \
|
||||
++u; \
|
||||
++v; \
|
||||
dst += 2 * XSTEP; \
|
||||
dst += 2 * (XSTEP); \
|
||||
} \
|
||||
if (len & 1) { \
|
||||
FUNC(y[0], u[0], v[0], dst); \
|
||||
@@ -123,15 +71,11 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
WebPSamplerRowFunc WebPSamplers[MODE_LAST];
|
||||
|
||||
extern void WebPInitSamplersSSE2(void);
|
||||
extern void WebPInitSamplersSSE41(void);
|
||||
extern void WebPInitSamplersMIPS32(void);
|
||||
extern void WebPInitSamplersMIPSdspR2(void);
|
||||
|
||||
static volatile VP8CPUInfo yuv_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&yuv_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
|
||||
if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
|
||||
WebPSamplers[MODE_RGB] = YuvToRgbRow;
|
||||
WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
|
||||
WebPSamplers[MODE_BGR] = YuvToBgrRow;
|
||||
@@ -151,6 +95,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
|
||||
WebPInitSamplersSSE2();
|
||||
}
|
||||
#endif // WEBP_USE_SSE2
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitSamplersSSE41();
|
||||
}
|
||||
#endif // WEBP_USE_SSE41
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
WebPInitSamplersMIPS32();
|
||||
@@ -162,13 +111,12 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
|
||||
}
|
||||
#endif // WEBP_USE_MIPS_DSP_R2
|
||||
}
|
||||
yuv_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// ARGB -> YUV converters
|
||||
|
||||
static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
const uint32_t p = argb[i];
|
||||
@@ -220,14 +168,14 @@ void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, rgb += 3) {
|
||||
y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, bgr += 3) {
|
||||
y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
|
||||
@@ -246,6 +194,7 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
@@ -283,6 +232,7 @@ static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
@@ -304,26 +254,26 @@ void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
|
||||
void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
static volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
extern void WebPInitConvertARGBToYUVSSE41(void);
|
||||
extern void WebPInitConvertARGBToYUVNEON(void);
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
|
||||
if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPConvertARGBToY = ConvertARGBToY;
|
||||
WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
WebPConvertARGBToY = ConvertARGBToY_C;
|
||||
WebPConvertARGBToUV = WebPConvertARGBToUV_C;
|
||||
|
||||
WebPConvertRGB24ToY = ConvertRGB24ToY;
|
||||
WebPConvertBGR24ToY = ConvertBGR24ToY;
|
||||
WebPConvertRGB24ToY = ConvertRGB24ToY_C;
|
||||
WebPConvertBGR24ToY = ConvertBGR24ToY_C;
|
||||
|
||||
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@@ -332,6 +282,27 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
|
||||
WebPInitSharpYUVSSE2();
|
||||
}
|
||||
#endif // WEBP_USE_SSE2
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitConvertARGBToYUVSSE41();
|
||||
}
|
||||
#endif // WEBP_USE_SSE41
|
||||
}
|
||||
rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitConvertARGBToYUVNEON();
|
||||
WebPInitSharpYUVNEON();
|
||||
}
|
||||
#endif // WEBP_USE_NEON
|
||||
|
||||
assert(WebPConvertARGBToY != NULL);
|
||||
assert(WebPConvertARGBToUV != NULL);
|
||||
assert(WebPConvertRGB24ToY != NULL);
|
||||
assert(WebPConvertBGR24ToY != NULL);
|
||||
assert(WebPConvertRGBA32ToUV != NULL);
|
||||
assert(WebPSharpYUVUpdateY != NULL);
|
||||
assert(WebPSharpYUVUpdateRGB != NULL);
|
||||
assert(WebPSharpYUVFilterRow != NULL);
|
||||
}
|
||||
|
||||
+30
-58
@@ -35,19 +35,9 @@
|
||||
#ifndef WEBP_DSP_YUV_H_
|
||||
#define WEBP_DSP_YUV_H_
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dec/vp8_dec.h"
|
||||
|
||||
#if defined(WEBP_EXPERIMENTAL_FEATURES)
|
||||
// Do NOT activate this feature for real compression. This is only experimental!
|
||||
// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
|
||||
// This colorspace is close to Rec.601's Y'CbCr model with the notable
|
||||
// difference of allowing larger range for luma/chroma.
|
||||
// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
|
||||
// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
|
||||
// #define USE_YUVj
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// YUV -> RGB conversion
|
||||
|
||||
@@ -58,12 +48,8 @@ extern "C" {
|
||||
enum {
|
||||
YUV_FIX = 16, // fixed-point precision for RGB->YUV
|
||||
YUV_HALF = 1 << (YUV_FIX - 1),
|
||||
YUV_MASK = (256 << YUV_FIX) - 1,
|
||||
YUV_RANGE_MIN = -227, // min value of r/g/b output
|
||||
YUV_RANGE_MAX = 256 + 226, // max value of r/g/b output
|
||||
|
||||
YUV_FIX2 = 6, // fixed-point precision for YUV->RGB
|
||||
YUV_HALF2 = 1 << YUV_FIX2 >> 1,
|
||||
YUV_MASK2 = (256 << YUV_FIX2) - 1
|
||||
};
|
||||
|
||||
@@ -111,7 +97,7 @@ static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
|
||||
const int b = VP8YUVToB(y, u); // 5 usable bits
|
||||
const int rg = (r & 0xf8) | (g >> 5);
|
||||
const int gb = ((g << 3) & 0xe0) | (b >> 3);
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
rgb[0] = gb;
|
||||
rgb[1] = rg;
|
||||
#else
|
||||
@@ -127,7 +113,7 @@ static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
|
||||
const int b = VP8YUVToB(y, u); // 4 usable bits
|
||||
const int rg = (r & 0xf0) | (g >> 4);
|
||||
const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
argb[0] = ba;
|
||||
argb[1] = rg;
|
||||
#else
|
||||
@@ -157,32 +143,42 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
rgba[3] = 0xff;
|
||||
}
|
||||
|
||||
// Must be called before everything, to initialize the tables.
|
||||
void VP8YUVInit(void);
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// SSE2 extra functions (mostly for upsampling_sse2.c)
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// SSE41 extra functions (mostly for upsampling_sse41.c)
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RGB -> YUV conversion
|
||||
|
||||
@@ -192,8 +188,6 @@ static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
|
||||
return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
|
||||
}
|
||||
|
||||
#ifndef USE_YUVj
|
||||
|
||||
static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
|
||||
const int luma = 16839 * r + 33059 * g + 6420 * b;
|
||||
return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX; // no need to clip
|
||||
@@ -209,30 +203,8 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
|
||||
return VP8ClipUV(v, rounding);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// This JPEG-YUV colorspace, only for comparison!
|
||||
// These are also 16bit precision coefficients from Rec.601, but with full
|
||||
// [0..255] output range.
|
||||
static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
|
||||
const int luma = 19595 * r + 38470 * g + 7471 * b;
|
||||
return (luma + rounding) >> YUV_FIX; // no need to clip
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
|
||||
const int u = -11058 * r - 21710 * g + 32768 * b;
|
||||
return VP8ClipUV(u, rounding);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
|
||||
const int v = 32768 * r - 27439 * g - 5329 * b;
|
||||
return VP8ClipUV(v, rounding);
|
||||
}
|
||||
|
||||
#endif // USE_YUVj
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DSP_YUV_H_ */
|
||||
#endif // WEBP_DSP_YUV_H_
|
||||
|
||||
+191
-180
@@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./yuv.h"
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include "./common_sse2.h"
|
||||
#include "../dsp/common_sse2.h"
|
||||
#include <stdlib.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
@@ -26,12 +26,12 @@
|
||||
// R = (19077 * y + 26149 * v - 14234) >> 6
|
||||
// G = (19077 * y - 6419 * u - 13320 * v + 8708) >> 6
|
||||
// B = (19077 * y + 33050 * u - 17685) >> 6
|
||||
static void ConvertYUV444ToRGB(const __m128i* const Y0,
|
||||
const __m128i* const U0,
|
||||
const __m128i* const V0,
|
||||
__m128i* const R,
|
||||
__m128i* const G,
|
||||
__m128i* const B) {
|
||||
static void ConvertYUV444ToRGB_SSE2(const __m128i* const Y0,
|
||||
const __m128i* const U0,
|
||||
const __m128i* const V0,
|
||||
__m128i* const R,
|
||||
__m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i k19077 = _mm_set1_epi16(19077);
|
||||
const __m128i k26149 = _mm_set1_epi16(26149);
|
||||
const __m128i k14234 = _mm_set1_epi16(14234);
|
||||
@@ -66,13 +66,13 @@ static void ConvertYUV444ToRGB(const __m128i* const Y0,
|
||||
}
|
||||
|
||||
// Load the bytes into the *upper* part of 16b words. That's "<< 8", basically.
|
||||
static WEBP_INLINE __m128i Load_HI_16(const uint8_t* src) {
|
||||
static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src));
|
||||
}
|
||||
|
||||
// Load and replicate the U/V samples
|
||||
static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) {
|
||||
static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
|
||||
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
|
||||
@@ -80,29 +80,33 @@ static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G, __m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16(y), U0 = Load_HI_16(u), V0 = Load_HI_16(v);
|
||||
ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_HI_16_SSE2(u),
|
||||
V0 = Load_HI_16_SSE2(v);
|
||||
ConvertYUV444ToRGB_SSE2(&Y0, &U0, &V0, R, G, B);
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G, __m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16(y), U0 = Load_UV_HI_8(u), V0 = Load_UV_HI_8(v);
|
||||
ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_UV_HI_8_SSE2(u),
|
||||
V0 = Load_UV_HI_8_SSE2(v);
|
||||
ConvertYUV444ToRGB_SSE2(&Y0, &U0, &V0, R, G, B);
|
||||
}
|
||||
|
||||
// Pack R/G/B/A results into 32b output.
|
||||
static WEBP_INLINE void PackAndStore4(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
const __m128i rb = _mm_packus_epi16(*R, *B);
|
||||
const __m128i ga = _mm_packus_epi16(*G, *A);
|
||||
const __m128i rg = _mm_unpacklo_epi8(rb, ga);
|
||||
@@ -114,12 +118,12 @@ static WEBP_INLINE void PackAndStore4(const __m128i* const R,
|
||||
}
|
||||
|
||||
// Pack R/G/B/A results into 16b output.
|
||||
static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
const __m128i rg0 = _mm_packus_epi16(*R, *G);
|
||||
const __m128i ba0 = _mm_packus_epi16(*B, *A);
|
||||
#else
|
||||
@@ -136,10 +140,10 @@ static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
|
||||
}
|
||||
|
||||
// Pack R/G/B results into 16b output.
|
||||
static WEBP_INLINE void PackAndStore565(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
uint8_t* const dst) {
|
||||
const __m128i r0 = _mm_packus_epi16(*R, *R);
|
||||
const __m128i g0 = _mm_packus_epi16(*G, *G);
|
||||
const __m128i b0 = _mm_packus_epi16(*B, *B);
|
||||
@@ -149,7 +153,7 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
|
||||
const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
|
||||
const __m128i rg = _mm_or_si128(r1, g1);
|
||||
const __m128i gb = _mm_or_si128(g2, b1);
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb);
|
||||
#else
|
||||
const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);
|
||||
@@ -160,10 +164,10 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
|
||||
// Pack the planar buffers
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
|
||||
static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@@ -176,7 +180,7 @@ static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
|
||||
// Repeat the same permutations twice more:
|
||||
// r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
|
||||
// r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
|
||||
VP8PlanarTo24b(in0, in1, in2, in3, in4, in5);
|
||||
VP8PlanarTo24b_SSE2(in0, in1, in2, in3, in4, in5);
|
||||
|
||||
_mm_storeu_si128((__m128i*)(rgb + 0), *in0);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 16), *in1);
|
||||
@@ -186,69 +190,69 @@ static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4(&R, &G, &B, &kAlpha, dst);
|
||||
YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&R, &G, &B, &kAlpha, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4(&B, &G, &R, &kAlpha, dst);
|
||||
YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&B, &G, &R, &kAlpha, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4(&kAlpha, &R, &G, &B, dst);
|
||||
YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&kAlpha, &R, &G, &B, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4444_SSE2(&R, &G, &B, &kAlpha, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore565_SSE2(&R, &G, &B, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore4444(&R, &G, &B, &kAlpha, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
|
||||
PackAndStore565(&R, &G, &B, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
YUV444ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
YUV444ToRGB_SSE2(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB_SSE2(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB_SSE2(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB_SSE2(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as RRRRGGGGBBBB.
|
||||
rgb0 = _mm_packus_epi16(R0, R1);
|
||||
@@ -259,18 +263,18 @@ void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
rgb5 = _mm_packus_epi16(B2, B3);
|
||||
|
||||
// Pack as RGBRGBRGBRGB.
|
||||
PlanarTo24b(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
YUV444ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
YUV444ToRGB_SSE2(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB_SSE2(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB_SSE2(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB_SSE2(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as BBBBGGGGRRRR.
|
||||
bgr0 = _mm_packus_epi16(B0, B1);
|
||||
@@ -281,20 +285,21 @@ void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
bgr5= _mm_packus_epi16(R2, R3);
|
||||
|
||||
// Pack as BGRBGRBGRBGR.
|
||||
PlanarTo24b(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
PlanarTo24b_SSE2(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV420ToRGB(y, u, v, &R, &G, &B);
|
||||
PackAndStore4(&R, &G, &B, &kAlpha, dst);
|
||||
YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&R, &G, &B, &kAlpha, dst);
|
||||
y += 8;
|
||||
u += 4;
|
||||
v += 4;
|
||||
@@ -308,14 +313,15 @@ static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV420ToRGB(y, u, v, &R, &G, &B);
|
||||
PackAndStore4(&B, &G, &R, &kAlpha, dst);
|
||||
YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&B, &G, &R, &kAlpha, dst);
|
||||
y += 8;
|
||||
u += 4;
|
||||
v += 4;
|
||||
@@ -329,14 +335,15 @@ static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
__m128i R, G, B;
|
||||
YUV420ToRGB(y, u, v, &R, &G, &B);
|
||||
PackAndStore4(&kAlpha, &R, &G, &B, dst);
|
||||
YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
|
||||
PackAndStore4_SSE2(&kAlpha, &R, &G, &B, dst);
|
||||
y += 8;
|
||||
u += 4;
|
||||
v += 4;
|
||||
@@ -350,17 +357,18 @@ static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
YUV420ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
YUV420ToRGB_SSE2(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB_SSE2(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB_SSE2(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB_SSE2(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as RRRRGGGGBBBB.
|
||||
rgb0 = _mm_packus_epi16(R0, R1);
|
||||
@@ -371,7 +379,7 @@ static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
rgb5 = _mm_packus_epi16(B2, B3);
|
||||
|
||||
// Pack as RGBRGBRGBRGB.
|
||||
PlanarTo24b(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
|
||||
y += 32;
|
||||
u += 16;
|
||||
@@ -386,17 +394,18 @@ static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
YUV420ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
YUV420ToRGB_SSE2(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB_SSE2(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB_SSE2(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB_SSE2(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as BBBBGGGGRRRR.
|
||||
bgr0 = _mm_packus_epi16(B0, B1);
|
||||
@@ -407,7 +416,7 @@ static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
bgr5 = _mm_packus_epi16(R2, R3);
|
||||
|
||||
// Pack as BGRBGRBGRBGR.
|
||||
PlanarTo24b(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
PlanarTo24b_SSE2(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
|
||||
y += 32;
|
||||
u += 16;
|
||||
@@ -428,11 +437,11 @@ static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
extern void WebPInitSamplersSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
|
||||
WebPSamplers[MODE_RGB] = YuvToRgbRow;
|
||||
WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
|
||||
WebPSamplers[MODE_BGR] = YuvToBgrRow;
|
||||
WebPSamplers[MODE_BGRA] = YuvToBgraRow;
|
||||
WebPSamplers[MODE_ARGB] = YuvToArgbRow;
|
||||
WebPSamplers[MODE_RGB] = YuvToRgbRow_SSE2;
|
||||
WebPSamplers[MODE_RGBA] = YuvToRgbaRow_SSE2;
|
||||
WebPSamplers[MODE_BGR] = YuvToBgrRow_SSE2;
|
||||
WebPSamplers[MODE_BGRA] = YuvToBgraRow_SSE2;
|
||||
WebPSamplers[MODE_ARGB] = YuvToArgbRow_SSE2;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -445,7 +454,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
|
||||
|
||||
// Function that inserts a value of the second half of the in buffer in between
|
||||
// every two char of the first half.
|
||||
static WEBP_INLINE void RGB24PackedToPlanarHelper(
|
||||
static WEBP_INLINE void RGB24PackedToPlanarHelper_SSE2(
|
||||
const __m128i* const in /*in[6]*/, __m128i* const out /*out[6]*/) {
|
||||
out[0] = _mm_unpacklo_epi8(in[0], in[3]);
|
||||
out[1] = _mm_unpackhi_epi8(in[0], in[3]);
|
||||
@@ -458,8 +467,8 @@ static WEBP_INLINE void RGB24PackedToPlanarHelper(
|
||||
// Unpack the 8b input rgbrgbrgbrgb ... as contiguous registers:
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb,
|
||||
__m128i* const out /*out[6]*/) {
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
__m128i tmp[6];
|
||||
tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
@@ -468,22 +477,22 @@ static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb,
|
||||
tmp[4] = _mm_loadu_si128((const __m128i*)(rgb + 64));
|
||||
tmp[5] = _mm_loadu_si128((const __m128i*)(rgb + 80));
|
||||
|
||||
RGB24PackedToPlanarHelper(tmp, out);
|
||||
RGB24PackedToPlanarHelper(out, tmp);
|
||||
RGB24PackedToPlanarHelper(tmp, out);
|
||||
RGB24PackedToPlanarHelper(out, tmp);
|
||||
RGB24PackedToPlanarHelper(tmp, out);
|
||||
RGB24PackedToPlanarHelper_SSE2(tmp, out);
|
||||
RGB24PackedToPlanarHelper_SSE2(out, tmp);
|
||||
RGB24PackedToPlanarHelper_SSE2(tmp, out);
|
||||
RGB24PackedToPlanarHelper_SSE2(out, tmp);
|
||||
RGB24PackedToPlanarHelper_SSE2(tmp, out);
|
||||
}
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
|
||||
__m128i* const rgb /*in[6]*/) {
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
|
||||
__m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
__m128i a2 = LOAD_16(argb + 8);
|
||||
__m128i a3 = LOAD_16(argb + 12);
|
||||
VP8L32bToPlanar(&a0, &a1, &a2, &a3);
|
||||
VP8L32bToPlanar_SSE2(&a0, &a1, &a2, &a3);
|
||||
rgb[0] = _mm_unpacklo_epi8(a1, zero);
|
||||
rgb[1] = _mm_unpackhi_epi8(a1, zero);
|
||||
rgb[2] = _mm_unpacklo_epi8(a2, zero);
|
||||
@@ -511,10 +520,10 @@ static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
|
||||
} while (0)
|
||||
|
||||
#define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
|
||||
static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const Y) {
|
||||
static WEBP_INLINE void ConvertRGBToY_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const Y) {
|
||||
const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
|
||||
const __m128i kGB_y = MK_CST_16(16384, 6420);
|
||||
const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
|
||||
@@ -526,10 +535,11 @@ static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
|
||||
TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const U, __m128i* const V) {
|
||||
static WEBP_INLINE void ConvertRGBToUV_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const U,
|
||||
__m128i* const V) {
|
||||
const __m128i kRG_u = MK_CST_16(-9719, -19081);
|
||||
const __m128i kGB_u = MK_CST_16(0, 28800);
|
||||
const __m128i kRG_v = MK_CST_16(28800, 0);
|
||||
@@ -549,14 +559,14 @@ static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
__m128i rgb_plane[6];
|
||||
int j;
|
||||
|
||||
RGB24PackedToPlanar(rgb, rgb_plane);
|
||||
RGB24PackedToPlanar_SSE2(rgb, rgb_plane);
|
||||
|
||||
for (j = 0; j < 2; ++j, i += 16) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@@ -566,13 +576,13 @@ static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
r = _mm_unpacklo_epi8(rgb_plane[0 + j], zero);
|
||||
g = _mm_unpacklo_epi8(rgb_plane[2 + j], zero);
|
||||
b = _mm_unpacklo_epi8(rgb_plane[4 + j], zero);
|
||||
ConvertRGBToY(&r, &g, &b, &Y0);
|
||||
ConvertRGBToY_SSE2(&r, &g, &b, &Y0);
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
r = _mm_unpackhi_epi8(rgb_plane[0 + j], zero);
|
||||
g = _mm_unpackhi_epi8(rgb_plane[2 + j], zero);
|
||||
b = _mm_unpackhi_epi8(rgb_plane[4 + j], zero);
|
||||
ConvertRGBToY(&r, &g, &b, &Y1);
|
||||
ConvertRGBToY_SSE2(&r, &g, &b, &Y1);
|
||||
|
||||
// Cast to 8-bit and store.
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
@@ -583,14 +593,14 @@ static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
__m128i bgr_plane[6];
|
||||
int j;
|
||||
|
||||
RGB24PackedToPlanar(bgr, bgr_plane);
|
||||
RGB24PackedToPlanar_SSE2(bgr, bgr_plane);
|
||||
|
||||
for (j = 0; j < 2; ++j, i += 16) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@@ -600,13 +610,13 @@ static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
|
||||
g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
|
||||
r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
|
||||
ConvertRGBToY(&r, &g, &b, &Y0);
|
||||
ConvertRGBToY_SSE2(&r, &g, &b, &Y0);
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
|
||||
g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
|
||||
r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
|
||||
ConvertRGBToY(&r, &g, &b, &Y1);
|
||||
ConvertRGBToY_SSE2(&r, &g, &b, &Y1);
|
||||
|
||||
// Cast to 8-bit and store.
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
@@ -617,14 +627,14 @@ static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* argb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
__m128i Y0, Y1, rgb[6];
|
||||
RGB32PackedToPlanar(&argb[i], rgb);
|
||||
ConvertRGBToY(&rgb[0], &rgb[2], &rgb[4], &Y0);
|
||||
ConvertRGBToY(&rgb[1], &rgb[3], &rgb[5], &Y1);
|
||||
RGB32PackedToPlanar_SSE2(&argb[i], rgb);
|
||||
ConvertRGBToY_SSE2(&rgb[0], &rgb[2], &rgb[4], &Y0);
|
||||
ConvertRGBToY_SSE2(&rgb[1], &rgb[3], &rgb[5], &Y1);
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
}
|
||||
for (; i < width; ++i) { // left-over
|
||||
@@ -636,31 +646,33 @@ static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
|
||||
|
||||
// Horizontal add (doubled) of two 16b values, result is 16b.
|
||||
// in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
|
||||
static void HorizontalAddPack(const __m128i* const A, const __m128i* const B,
|
||||
__m128i* const out) {
|
||||
static void HorizontalAddPack_SSE2(const __m128i* const A,
|
||||
const __m128i* const B,
|
||||
__m128i* const out) {
|
||||
const __m128i k2 = _mm_set1_epi16(2);
|
||||
const __m128i C = _mm_madd_epi16(*A, k2);
|
||||
const __m128i D = _mm_madd_epi16(*B, k2);
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
|
||||
__m128i rgb[6], U0, V0, U1, V1;
|
||||
RGB32PackedToPlanar(&argb[i], rgb);
|
||||
HorizontalAddPack(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
|
||||
RGB32PackedToPlanar_SSE2(&argb[i], rgb);
|
||||
HorizontalAddPack_SSE2(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack_SSE2(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack_SSE2(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV_SSE2(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
|
||||
|
||||
RGB32PackedToPlanar(&argb[i + 16], rgb);
|
||||
HorizontalAddPack(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
|
||||
RGB32PackedToPlanar_SSE2(&argb[i + 16], rgb);
|
||||
HorizontalAddPack_SSE2(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack_SSE2(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack_SSE2(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV_SSE2(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
|
||||
|
||||
U0 = _mm_packus_epi16(U0, U1);
|
||||
V0 = _mm_packus_epi16(V0, V1);
|
||||
@@ -679,10 +691,9 @@ static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
}
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
|
||||
__m128i* const r,
|
||||
__m128i* const g,
|
||||
__m128i* const b) {
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
const uint16_t* const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
const __m128i in2 = LOAD_16(rgbx + 16); // r4 | ...
|
||||
@@ -701,16 +712,16 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
|
||||
*b = _mm_unpacklo_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
__m128i r, g, b, U0, V0, U1, V1;
|
||||
RGBA32PackedToPlanar_16b(rgb + 0, &r, &g, &b);
|
||||
ConvertRGBToUV(&r, &g, &b, &U0, &V0);
|
||||
RGBA32PackedToPlanar_16b(rgb + 32, &r, &g, &b);
|
||||
ConvertRGBToUV(&r, &g, &b, &U1, &V1);
|
||||
RGBA32PackedToPlanar_16b_SSE2(rgb + 0, &r, &g, &b);
|
||||
ConvertRGBToUV_SSE2(&r, &g, &b, &U0, &V0);
|
||||
RGBA32PackedToPlanar_16b_SSE2(rgb + 32, &r, &g, &b);
|
||||
ConvertRGBToUV_SSE2(&r, &g, &b, &U1, &V1);
|
||||
STORE_16(_mm_packus_epi16(U0, U1), u);
|
||||
STORE_16(_mm_packus_epi16(V0, V1), v);
|
||||
u += 16;
|
||||
@@ -727,13 +738,13 @@ static void ConvertRGBA32ToUV(const uint16_t* rgb,
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
|
||||
WebPConvertARGBToY = ConvertARGBToY;
|
||||
WebPConvertARGBToUV = ConvertARGBToUV;
|
||||
WebPConvertARGBToY = ConvertARGBToY_SSE2;
|
||||
WebPConvertARGBToUV = ConvertARGBToUV_SSE2;
|
||||
|
||||
WebPConvertRGB24ToY = ConvertRGB24ToY;
|
||||
WebPConvertBGR24ToY = ConvertBGR24ToY;
|
||||
WebPConvertRGB24ToY = ConvertRGB24ToY_SSE2;
|
||||
WebPConvertBGR24ToY = ConvertBGR24ToY_SSE2;
|
||||
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV;
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,613 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// YUV->RGB conversion functions
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "../dsp/yuv.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include "../dsp/common_sse41.h"
|
||||
#include <stdlib.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
|
||||
|
||||
// These constants are 14b fixed-point version of ITU-R BT.601 constants.
|
||||
// R = (19077 * y + 26149 * v - 14234) >> 6
|
||||
// G = (19077 * y - 6419 * u - 13320 * v + 8708) >> 6
|
||||
// B = (19077 * y + 33050 * u - 17685) >> 6
|
||||
static void ConvertYUV444ToRGB_SSE41(const __m128i* const Y0,
|
||||
const __m128i* const U0,
|
||||
const __m128i* const V0,
|
||||
__m128i* const R,
|
||||
__m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i k19077 = _mm_set1_epi16(19077);
|
||||
const __m128i k26149 = _mm_set1_epi16(26149);
|
||||
const __m128i k14234 = _mm_set1_epi16(14234);
|
||||
// 33050 doesn't fit in a signed short: only use this with unsigned arithmetic
|
||||
const __m128i k33050 = _mm_set1_epi16((short)33050);
|
||||
const __m128i k17685 = _mm_set1_epi16(17685);
|
||||
const __m128i k6419 = _mm_set1_epi16(6419);
|
||||
const __m128i k13320 = _mm_set1_epi16(13320);
|
||||
const __m128i k8708 = _mm_set1_epi16(8708);
|
||||
|
||||
const __m128i Y1 = _mm_mulhi_epu16(*Y0, k19077);
|
||||
|
||||
const __m128i R0 = _mm_mulhi_epu16(*V0, k26149);
|
||||
const __m128i R1 = _mm_sub_epi16(Y1, k14234);
|
||||
const __m128i R2 = _mm_add_epi16(R1, R0);
|
||||
|
||||
const __m128i G0 = _mm_mulhi_epu16(*U0, k6419);
|
||||
const __m128i G1 = _mm_mulhi_epu16(*V0, k13320);
|
||||
const __m128i G2 = _mm_add_epi16(Y1, k8708);
|
||||
const __m128i G3 = _mm_add_epi16(G0, G1);
|
||||
const __m128i G4 = _mm_sub_epi16(G2, G3);
|
||||
|
||||
// be careful with the saturated *unsigned* arithmetic here!
|
||||
const __m128i B0 = _mm_mulhi_epu16(*U0, k33050);
|
||||
const __m128i B1 = _mm_adds_epu16(B0, Y1);
|
||||
const __m128i B2 = _mm_subs_epu16(B1, k17685);
|
||||
|
||||
// use logical shift for B2, which can be larger than 32767
|
||||
*R = _mm_srai_epi16(R2, 6); // range: [-14234, 30815]
|
||||
*G = _mm_srai_epi16(G4, 6); // range: [-10953, 27710]
|
||||
*B = _mm_srli_epi16(B2, 6); // range: [0, 34238]
|
||||
}
|
||||
|
||||
// Load the bytes into the *upper* part of 16b words. That's "<< 8", basically.
|
||||
static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src));
|
||||
}
|
||||
|
||||
// Load and replicate the U/V samples
|
||||
static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
|
||||
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
|
||||
return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_HI_16_SSE41(u),
|
||||
V0 = Load_HI_16_SSE41(v);
|
||||
ConvertYUV444ToRGB_SSE41(&Y0, &U0, &V0, R, G, B);
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_UV_HI_8_SSE41(u),
|
||||
V0 = Load_UV_HI_8_SSE41(v);
|
||||
ConvertYUV444ToRGB_SSE41(&Y0, &U0, &V0, R, G, B);
|
||||
}
|
||||
|
||||
// Pack the planar buffers
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
|
||||
static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
// around as follows:
|
||||
// Input:
|
||||
// r0r1r2r3 | r4r5r6r7 | g0g1g2g3 | g4g5g6g7 | b0b1b2b3 | b4b5b6b7
|
||||
// Split the 6 registers in two sets of 3 registers: the first set as the even
|
||||
// 8b bytes, the second the odd ones:
|
||||
// r0r2r4r6 | g0g2g4g6 | b0b2b4b6 | r1r3r5r7 | g1g3g5g7 | b1b3b5b7
|
||||
// Repeat the same permutations twice more:
|
||||
// r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
|
||||
// r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
|
||||
VP8PlanarTo24b_SSE41(in0, in1, in2, in3, in4, in5);
|
||||
|
||||
_mm_storeu_si128((__m128i*)(rgb + 0), *in0);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 16), *in1);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 32), *in2);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 48), *in3);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 64), *in4);
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
YUV444ToRGB_SSE41(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB_SSE41(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB_SSE41(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB_SSE41(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as RRRRGGGGBBBB.
|
||||
rgb0 = _mm_packus_epi16(R0, R1);
|
||||
rgb1 = _mm_packus_epi16(R2, R3);
|
||||
rgb2 = _mm_packus_epi16(G0, G1);
|
||||
rgb3 = _mm_packus_epi16(G2, G3);
|
||||
rgb4 = _mm_packus_epi16(B0, B1);
|
||||
rgb5 = _mm_packus_epi16(B2, B3);
|
||||
|
||||
// Pack as RGBRGBRGBRGB.
|
||||
PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
YUV444ToRGB_SSE41(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV444ToRGB_SSE41(y + 8, u + 8, v + 8, &R1, &G1, &B1);
|
||||
YUV444ToRGB_SSE41(y + 16, u + 16, v + 16, &R2, &G2, &B2);
|
||||
YUV444ToRGB_SSE41(y + 24, u + 24, v + 24, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as BBBBGGGGRRRR.
|
||||
bgr0 = _mm_packus_epi16(B0, B1);
|
||||
bgr1 = _mm_packus_epi16(B2, B3);
|
||||
bgr2 = _mm_packus_epi16(G0, G1);
|
||||
bgr3 = _mm_packus_epi16(G2, G3);
|
||||
bgr4 = _mm_packus_epi16(R0, R1);
|
||||
bgr5= _mm_packus_epi16(R2, R3);
|
||||
|
||||
// Pack as BGRBGRBGRBGR.
|
||||
PlanarTo24b_SSE41(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
YUV420ToRGB_SSE41(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB_SSE41(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB_SSE41(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB_SSE41(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as RRRRGGGGBBBB.
|
||||
rgb0 = _mm_packus_epi16(R0, R1);
|
||||
rgb1 = _mm_packus_epi16(R2, R3);
|
||||
rgb2 = _mm_packus_epi16(G0, G1);
|
||||
rgb3 = _mm_packus_epi16(G2, G3);
|
||||
rgb4 = _mm_packus_epi16(B0, B1);
|
||||
rgb5 = _mm_packus_epi16(B2, B3);
|
||||
|
||||
// Pack as RGBRGBRGBRGB.
|
||||
PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
|
||||
y += 32;
|
||||
u += 16;
|
||||
v += 16;
|
||||
}
|
||||
for (; n < len; ++n) { // Finish off
|
||||
VP8YuvToRgb(y[0], u[0], v[0], dst);
|
||||
dst += 3;
|
||||
y += 1;
|
||||
u += (n & 1);
|
||||
v += (n & 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
YUV420ToRGB_SSE41(y + 0, u + 0, v + 0, &R0, &G0, &B0);
|
||||
YUV420ToRGB_SSE41(y + 8, u + 4, v + 4, &R1, &G1, &B1);
|
||||
YUV420ToRGB_SSE41(y + 16, u + 8, v + 8, &R2, &G2, &B2);
|
||||
YUV420ToRGB_SSE41(y + 24, u + 12, v + 12, &R3, &G3, &B3);
|
||||
|
||||
// Cast to 8b and store as BBBBGGGGRRRR.
|
||||
bgr0 = _mm_packus_epi16(B0, B1);
|
||||
bgr1 = _mm_packus_epi16(B2, B3);
|
||||
bgr2 = _mm_packus_epi16(G0, G1);
|
||||
bgr3 = _mm_packus_epi16(G2, G3);
|
||||
bgr4 = _mm_packus_epi16(R0, R1);
|
||||
bgr5 = _mm_packus_epi16(R2, R3);
|
||||
|
||||
// Pack as BGRBGRBGRBGR.
|
||||
PlanarTo24b_SSE41(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
|
||||
|
||||
y += 32;
|
||||
u += 16;
|
||||
v += 16;
|
||||
}
|
||||
for (; n < len; ++n) { // Finish off
|
||||
VP8YuvToBgr(y[0], u[0], v[0], dst);
|
||||
dst += 3;
|
||||
y += 1;
|
||||
u += (n & 1);
|
||||
v += (n & 1);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void WebPInitSamplersSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE41(void) {
|
||||
WebPSamplers[MODE_RGB] = YuvToRgbRow_SSE41;
|
||||
WebPSamplers[MODE_BGR] = YuvToBgrRow_SSE41;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RGB24/32 -> YUV converters
|
||||
|
||||
// Load eight 16b-words from *src.
|
||||
#define LOAD_16(src) _mm_loadu_si128((const __m128i*)(src))
|
||||
// Store either 16b-words into *dst
|
||||
#define STORE_16(V, dst) _mm_storeu_si128((__m128i*)(dst), (V))
|
||||
|
||||
#define WEBP_SSE41_SHUFF(OUT) do { \
|
||||
const __m128i tmp0 = _mm_shuffle_epi8(A0, shuff0); \
|
||||
const __m128i tmp1 = _mm_shuffle_epi8(A1, shuff1); \
|
||||
const __m128i tmp2 = _mm_shuffle_epi8(A2, shuff2); \
|
||||
const __m128i tmp3 = _mm_shuffle_epi8(A3, shuff0); \
|
||||
const __m128i tmp4 = _mm_shuffle_epi8(A4, shuff1); \
|
||||
const __m128i tmp5 = _mm_shuffle_epi8(A5, shuff2); \
|
||||
\
|
||||
/* OR everything to get one channel */ \
|
||||
const __m128i tmp6 = _mm_or_si128(tmp0, tmp1); \
|
||||
const __m128i tmp7 = _mm_or_si128(tmp3, tmp4); \
|
||||
out[OUT + 0] = _mm_or_si128(tmp6, tmp2); \
|
||||
out[OUT + 1] = _mm_or_si128(tmp7, tmp5); \
|
||||
} while (0);
|
||||
|
||||
// Unpack the 8b input rgbrgbrgbrgb ... as contiguous registers:
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(rgb + 32));
|
||||
const __m128i A3 = _mm_loadu_si128((const __m128i*)(rgb + 48));
|
||||
const __m128i A4 = _mm_loadu_si128((const __m128i*)(rgb + 64));
|
||||
const __m128i A5 = _mm_loadu_si128((const __m128i*)(rgb + 80));
|
||||
|
||||
// Compute RR.
|
||||
{
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
WEBP_SSE41_SHUFF(0)
|
||||
}
|
||||
// Compute GG.
|
||||
{
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
WEBP_SSE41_SHUFF(2)
|
||||
}
|
||||
// Compute BB.
|
||||
{
|
||||
const __m128i shuff0 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
|
||||
const __m128i shuff1 = _mm_set_epi8(
|
||||
-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
|
||||
const __m128i shuff2 = _mm_set_epi8(
|
||||
15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
WEBP_SSE41_SHUFF(4)
|
||||
}
|
||||
}
|
||||
|
||||
#undef WEBP_SSE41_SHUFF
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE41(
|
||||
const uint32_t* const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
__m128i a2 = LOAD_16(argb + 8);
|
||||
__m128i a3 = LOAD_16(argb + 12);
|
||||
VP8L32bToPlanar_SSE41(&a0, &a1, &a2, &a3);
|
||||
rgb[0] = _mm_unpacklo_epi8(a1, zero);
|
||||
rgb[1] = _mm_unpackhi_epi8(a1, zero);
|
||||
rgb[2] = _mm_unpacklo_epi8(a2, zero);
|
||||
rgb[3] = _mm_unpackhi_epi8(a2, zero);
|
||||
rgb[4] = _mm_unpacklo_epi8(a3, zero);
|
||||
rgb[5] = _mm_unpackhi_epi8(a3, zero);
|
||||
}
|
||||
|
||||
// This macro computes (RG * MULT_RG + GB * MULT_GB + ROUNDER) >> DESCALE_FIX
|
||||
// It's a macro and not a function because we need to use immediate values with
|
||||
// srai_epi32, e.g.
|
||||
#define TRANSFORM(RG_LO, RG_HI, GB_LO, GB_HI, MULT_RG, MULT_GB, \
|
||||
ROUNDER, DESCALE_FIX, OUT) do { \
|
||||
const __m128i V0_lo = _mm_madd_epi16(RG_LO, MULT_RG); \
|
||||
const __m128i V0_hi = _mm_madd_epi16(RG_HI, MULT_RG); \
|
||||
const __m128i V1_lo = _mm_madd_epi16(GB_LO, MULT_GB); \
|
||||
const __m128i V1_hi = _mm_madd_epi16(GB_HI, MULT_GB); \
|
||||
const __m128i V2_lo = _mm_add_epi32(V0_lo, V1_lo); \
|
||||
const __m128i V2_hi = _mm_add_epi32(V0_hi, V1_hi); \
|
||||
const __m128i V3_lo = _mm_add_epi32(V2_lo, ROUNDER); \
|
||||
const __m128i V3_hi = _mm_add_epi32(V2_hi, ROUNDER); \
|
||||
const __m128i V5_lo = _mm_srai_epi32(V3_lo, DESCALE_FIX); \
|
||||
const __m128i V5_hi = _mm_srai_epi32(V3_hi, DESCALE_FIX); \
|
||||
(OUT) = _mm_packs_epi32(V5_lo, V5_hi); \
|
||||
} while (0)
|
||||
|
||||
#define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
|
||||
static WEBP_INLINE void ConvertRGBToY_SSE41(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const Y) {
|
||||
const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
|
||||
const __m128i kGB_y = MK_CST_16(16384, 6420);
|
||||
const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
|
||||
|
||||
const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
|
||||
const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
|
||||
const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
|
||||
const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
|
||||
TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ConvertRGBToUV_SSE41(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
__m128i* const U,
|
||||
__m128i* const V) {
|
||||
const __m128i kRG_u = MK_CST_16(-9719, -19081);
|
||||
const __m128i kGB_u = MK_CST_16(0, 28800);
|
||||
const __m128i kRG_v = MK_CST_16(28800, 0);
|
||||
const __m128i kGB_v = MK_CST_16(-24116, -4684);
|
||||
const __m128i kHALF_UV = _mm_set1_epi32(((128 << YUV_FIX) + YUV_HALF) << 2);
|
||||
|
||||
const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
|
||||
const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
|
||||
const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
|
||||
const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
|
||||
TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_u, kGB_u,
|
||||
kHALF_UV, YUV_FIX + 2, *U);
|
||||
TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_v, kGB_v,
|
||||
kHALF_UV, YUV_FIX + 2, *V);
|
||||
}
|
||||
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
__m128i rgb_plane[6];
|
||||
int j;
|
||||
|
||||
RGB24PackedToPlanar_SSE41(rgb, rgb_plane);
|
||||
|
||||
for (j = 0; j < 2; ++j, i += 16) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i r, g, b, Y0, Y1;
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
r = _mm_unpacklo_epi8(rgb_plane[0 + j], zero);
|
||||
g = _mm_unpacklo_epi8(rgb_plane[2 + j], zero);
|
||||
b = _mm_unpacklo_epi8(rgb_plane[4 + j], zero);
|
||||
ConvertRGBToY_SSE41(&r, &g, &b, &Y0);
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
r = _mm_unpackhi_epi8(rgb_plane[0 + j], zero);
|
||||
g = _mm_unpackhi_epi8(rgb_plane[2 + j], zero);
|
||||
b = _mm_unpackhi_epi8(rgb_plane[4 + j], zero);
|
||||
ConvertRGBToY_SSE41(&r, &g, &b, &Y1);
|
||||
|
||||
// Cast to 8-bit and store.
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
}
|
||||
}
|
||||
for (; i < width; ++i, rgb += 3) { // left-over
|
||||
y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
__m128i bgr_plane[6];
|
||||
int j;
|
||||
|
||||
RGB24PackedToPlanar_SSE41(bgr, bgr_plane);
|
||||
|
||||
for (j = 0; j < 2; ++j, i += 16) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i r, g, b, Y0, Y1;
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
|
||||
g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
|
||||
r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
|
||||
ConvertRGBToY_SSE41(&r, &g, &b, &Y0);
|
||||
|
||||
// Convert to 16-bit Y.
|
||||
b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
|
||||
g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
|
||||
r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
|
||||
ConvertRGBToY_SSE41(&r, &g, &b, &Y1);
|
||||
|
||||
// Cast to 8-bit and store.
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
}
|
||||
}
|
||||
for (; i < width; ++i, bgr += 3) { // left-over
|
||||
y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
__m128i Y0, Y1, rgb[6];
|
||||
RGB32PackedToPlanar_SSE41(&argb[i], rgb);
|
||||
ConvertRGBToY_SSE41(&rgb[0], &rgb[2], &rgb[4], &Y0);
|
||||
ConvertRGBToY_SSE41(&rgb[1], &rgb[3], &rgb[5], &Y1);
|
||||
STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
|
||||
}
|
||||
for (; i < width; ++i) { // left-over
|
||||
const uint32_t p = argb[i];
|
||||
y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff,
|
||||
YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
// Horizontal add (doubled) of two 16b values, result is 16b.
|
||||
// in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
|
||||
static void HorizontalAddPack_SSE41(const __m128i* const A,
|
||||
const __m128i* const B,
|
||||
__m128i* const out) {
|
||||
const __m128i k2 = _mm_set1_epi16(2);
|
||||
const __m128i C = _mm_madd_epi16(*A, k2);
|
||||
const __m128i D = _mm_madd_epi16(*B, k2);
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
|
||||
__m128i rgb[6], U0, V0, U1, V1;
|
||||
RGB32PackedToPlanar_SSE41(&argb[i], rgb);
|
||||
HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
|
||||
|
||||
RGB32PackedToPlanar_SSE41(&argb[i + 16], rgb);
|
||||
HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]);
|
||||
HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]);
|
||||
HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]);
|
||||
ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
|
||||
|
||||
U0 = _mm_packus_epi16(U0, U1);
|
||||
V0 = _mm_packus_epi16(V0, V1);
|
||||
if (!do_store) {
|
||||
const __m128i prev_u = LOAD_16(u);
|
||||
const __m128i prev_v = LOAD_16(v);
|
||||
U0 = _mm_avg_epu8(U0, prev_u);
|
||||
V0 = _mm_avg_epu8(V0, prev_v);
|
||||
}
|
||||
STORE_16(U0, u);
|
||||
STORE_16(V0, v);
|
||||
}
|
||||
if (i < src_width) { // left-over
|
||||
WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
|
||||
}
|
||||
}
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
const uint16_t* const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
const __m128i in2 = LOAD_16(rgbx + 16); // r4 | ...
|
||||
const __m128i in3 = LOAD_16(rgbx + 24); // r6 | ...
|
||||
// aarrggbb as 16-bit.
|
||||
const __m128i shuff0 =
|
||||
_mm_set_epi8(-1, -1, -1, -1, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0);
|
||||
const __m128i shuff1 =
|
||||
_mm_set_epi8(13, 12, 5, 4, -1, -1, -1, -1, 11, 10, 3, 2, 9, 8, 1, 0);
|
||||
const __m128i A0 = _mm_shuffle_epi8(in0, shuff0);
|
||||
const __m128i A1 = _mm_shuffle_epi8(in1, shuff1);
|
||||
const __m128i A2 = _mm_shuffle_epi8(in2, shuff0);
|
||||
const __m128i A3 = _mm_shuffle_epi8(in3, shuff1);
|
||||
// R0R1G0G1
|
||||
// B0B1****
|
||||
// R2R3G2G3
|
||||
// B2B3****
|
||||
// (OR is used to free port 5 for the unpack)
|
||||
const __m128i B0 = _mm_unpacklo_epi32(A0, A1);
|
||||
const __m128i B1 = _mm_or_si128(A0, A1);
|
||||
const __m128i B2 = _mm_unpacklo_epi32(A2, A3);
|
||||
const __m128i B3 = _mm_or_si128(A2, A3);
|
||||
// Gather the channels.
|
||||
*r = _mm_unpacklo_epi64(B0, B2);
|
||||
*g = _mm_unpackhi_epi64(B0, B2);
|
||||
*b = _mm_unpackhi_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
__m128i r, g, b, U0, V0, U1, V1;
|
||||
RGBA32PackedToPlanar_16b_SSE41(rgb + 0, &r, &g, &b);
|
||||
ConvertRGBToUV_SSE41(&r, &g, &b, &U0, &V0);
|
||||
RGBA32PackedToPlanar_16b_SSE41(rgb + 32, &r, &g, &b);
|
||||
ConvertRGBToUV_SSE41(&r, &g, &b, &U1, &V1);
|
||||
STORE_16(_mm_packus_epi16(U0, U1), u);
|
||||
STORE_16(_mm_packus_epi16(V0, V1), v);
|
||||
u += 16;
|
||||
v += 16;
|
||||
rgb += 2 * 32;
|
||||
}
|
||||
if (max_width < width) { // left-over
|
||||
WebPConvertRGBA32ToUV_C(rgb, u, v, width - max_width);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE41(void) {
|
||||
WebPConvertARGBToY = ConvertARGBToY_SSE41;
|
||||
WebPConvertARGBToUV = ConvertARGBToUV_SSE41;
|
||||
|
||||
WebPConvertRGB24ToY = ConvertRGB24ToY_SSE41;
|
||||
WebPConvertBGR24ToY = ConvertBGR24ToY_SSE41;
|
||||
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE41;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitSamplersSSE41)
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE41)
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
@@ -10,8 +10,8 @@
|
||||
// Author: Jyrki Alakuijala (jyrki@google.com)
|
||||
//
|
||||
|
||||
#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
|
||||
#define WEBP_ENC_BACKWARD_REFERENCES_H_
|
||||
#ifndef WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
|
||||
#define WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
@@ -91,11 +91,6 @@ static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
|
||||
return p->len;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
|
||||
assert(p->mode == kLiteral);
|
||||
return p->argb_or_distance;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
|
||||
assert(p->mode == kCacheIdx);
|
||||
assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
|
||||
@@ -113,6 +108,16 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
|
||||
#define HASH_BITS 18
|
||||
#define HASH_SIZE (1 << HASH_BITS)
|
||||
|
||||
// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it
|
||||
// is used in VP8LHashChain.
|
||||
#define MAX_LENGTH_BITS 12
|
||||
#define WINDOW_SIZE_BITS 20
|
||||
// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits.
|
||||
#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1)
|
||||
#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32
|
||||
#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32"
|
||||
#endif
|
||||
|
||||
typedef struct VP8LHashChain VP8LHashChain;
|
||||
struct VP8LHashChain {
|
||||
// The 20 most significant bits contain the offset at which the best match
|
||||
@@ -134,6 +139,24 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
int low_effort);
|
||||
void VP8LHashChainClear(VP8LHashChain* const p); // release memory
|
||||
|
||||
static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
|
||||
const int base_position) {
|
||||
return p->offset_length_[base_position] >> MAX_LENGTH_BITS;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8LHashChainFindLength(const VP8LHashChain* const p,
|
||||
const int base_position) {
|
||||
return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VP8LHashChainFindCopy(const VP8LHashChain* const p,
|
||||
int base_position,
|
||||
int* const offset_ptr,
|
||||
int* const length_ptr) {
|
||||
*offset_ptr = VP8LHashChainFindOffset(p, base_position);
|
||||
*length_ptr = VP8LHashChainFindLength(p, base_position);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// VP8LBackwardRefs (block-based backward-references storage)
|
||||
|
||||
@@ -158,9 +181,6 @@ struct VP8LBackwardRefs {
|
||||
void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size);
|
||||
// Release memory for backward references.
|
||||
void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs);
|
||||
// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error.
|
||||
int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
|
||||
VP8LBackwardRefs* const dst);
|
||||
|
||||
// Cursor for iterating on references content
|
||||
typedef struct {
|
||||
@@ -189,6 +209,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
|
||||
// -----------------------------------------------------------------------------
|
||||
// Main entry points
|
||||
|
||||
enum VP8LLZ77Type {
|
||||
kLZ77Standard = 1,
|
||||
kLZ77RLE = 2,
|
||||
kLZ77Box = 4
|
||||
};
|
||||
|
||||
// Evaluates best possible backward references for specified quality.
|
||||
// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache
|
||||
// bits to use (passing 0 implies disabling the local color cache).
|
||||
@@ -197,11 +223,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
|
||||
// refs[0] or refs[1].
|
||||
VP8LBackwardRefs* VP8LGetBackwardReferences(
|
||||
int width, int height, const uint32_t* const argb, int quality,
|
||||
int low_effort, int* const cache_bits,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]);
|
||||
int low_effort, int lz77_types_to_try, int* const cache_bits,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1,
|
||||
VP8LBackwardRefs* const refs_tmp2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBP_ENC_BACKWARD_REFERENCES_H_
|
||||
#endif // WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
|
||||
|
||||
@@ -11,12 +11,12 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_ENC_COST_H_
|
||||
#define WEBP_ENC_COST_H_
|
||||
#ifndef WEBP_ENC_COST_ENC_H_
|
||||
#define WEBP_ENC_COST_ENC_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "./vp8i_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -79,4 +79,4 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_ENC_COST_H_ */
|
||||
#endif // WEBP_ENC_COST_ENC_H_
|
||||
|
||||
@@ -11,12 +11,12 @@
|
||||
//
|
||||
// Models the histograms of literal and distance codes.
|
||||
|
||||
#ifndef WEBP_ENC_HISTOGRAM_H_
|
||||
#define WEBP_ENC_HISTOGRAM_H_
|
||||
#ifndef WEBP_ENC_HISTOGRAM_ENC_H_
|
||||
#define WEBP_ENC_HISTOGRAM_ENC_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./backward_references_enc.h"
|
||||
#include "../enc/backward_references_enc.h"
|
||||
#include "../webp/format_constants.h"
|
||||
#include "../webp/types.h"
|
||||
|
||||
@@ -44,6 +44,7 @@ typedef struct {
|
||||
double literal_cost_; // Cached values of dominant entropy costs:
|
||||
double red_cost_; // literal, red & blue.
|
||||
double blue_cost_;
|
||||
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
|
||||
} VP8LHistogram;
|
||||
|
||||
// Collection of histograms with fixed capacity, allocated as one
|
||||
@@ -67,7 +68,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
|
||||
int VP8LGetHistogramSize(int palette_code_bits);
|
||||
|
||||
// Set the palette_code_bits and reset the stats.
|
||||
void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
|
||||
// If init_arrays is true, the arrays are also filled with 0's.
|
||||
void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
|
||||
int init_arrays);
|
||||
|
||||
// Collect all the references into a histogram (without reset)
|
||||
void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
|
||||
@@ -83,6 +86,9 @@ void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
|
||||
// using 'cache_bits'. Return NULL in case of memory error.
|
||||
VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
|
||||
|
||||
// Set the histograms in set to 0.
|
||||
void VP8LHistogramSetClear(VP8LHistogramSet* const set);
|
||||
|
||||
// Allocate and initialize histogram object with specified 'cache_bits'.
|
||||
// Returns NULL in case of memory error.
|
||||
// Special case of VP8LAllocateHistogramSet, with size equals 1.
|
||||
@@ -90,7 +96,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
|
||||
|
||||
// Accumulate a token 'v' into a histogram.
|
||||
void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
||||
const PixOrCopy* const v);
|
||||
const PixOrCopy* const v,
|
||||
int (*const distance_modifier)(int, int),
|
||||
int distance_modifier_arg0);
|
||||
|
||||
static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
|
||||
return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
|
||||
@@ -103,21 +111,18 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int quality, int low_effort,
|
||||
int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_in,
|
||||
VP8LHistogramSet* const tmp_histos,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols);
|
||||
|
||||
// Returns the entropy for the symbols in the input array.
|
||||
// Also sets trivial_symbol to the code value, if the array has only one code
|
||||
// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n,
|
||||
uint32_t* const trivial_symbol);
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
|
||||
// Estimate how many bits the combined entropy of literals and distance
|
||||
// approximately maps to.
|
||||
double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
|
||||
double VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBP_ENC_HISTOGRAM_H_
|
||||
#endif // WEBP_ENC_HISTOGRAM_ENC_H_
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_ENC_VP8ENCI_H_
|
||||
#define WEBP_ENC_VP8ENCI_H_
|
||||
#ifndef WEBP_ENC_VP8I_ENC_H_
|
||||
#define WEBP_ENC_VP8I_ENC_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "../dec/common_dec.h"
|
||||
@@ -30,9 +30,9 @@ extern "C" {
|
||||
// Various defines and enums
|
||||
|
||||
// version numbers
|
||||
#define ENC_MAJ_VERSION 0
|
||||
#define ENC_MIN_VERSION 6
|
||||
#define ENC_REV_VERSION 0
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 0
|
||||
#define ENC_REV_VERSION 2
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
|
||||
@@ -75,10 +75,10 @@ typedef enum { // Rate-distortion optimization levels
|
||||
#define U_OFF_ENC (16)
|
||||
#define V_OFF_ENC (16 + 8)
|
||||
|
||||
extern const int VP8Scan[16]; // in quant.c
|
||||
extern const int VP8UVModeOffsets[4]; // in analyze.c
|
||||
extern const int VP8I16ModeOffsets[4];
|
||||
extern const int VP8I4ModeOffsets[NUM_BMODES];
|
||||
extern const uint16_t VP8Scan[16];
|
||||
extern const uint16_t VP8UVModeOffsets[4];
|
||||
extern const uint16_t VP8I16ModeOffsets[4];
|
||||
extern const uint16_t VP8I4ModeOffsets[NUM_BMODES];
|
||||
|
||||
// Layout of prediction blocks
|
||||
// intra 16x16
|
||||
@@ -120,6 +120,9 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
|
||||
// Uncomment the following to remove token-buffer code:
|
||||
// #define DISABLE_TOKEN_BUFFER
|
||||
|
||||
// quality below which error-diffusion is enabled
|
||||
#define ERROR_DIFFUSION_QUALITY 98
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Headers
|
||||
|
||||
@@ -201,6 +204,8 @@ typedef struct {
|
||||
score_t i4_penalty_; // penalty for using Intra4
|
||||
} VP8SegmentInfo;
|
||||
|
||||
typedef int8_t DError[2 /* u/v */][2 /* top or left */];
|
||||
|
||||
// Handy transient struct to accumulate score and info during RD-optimization
|
||||
// and mode evaluation.
|
||||
typedef struct {
|
||||
@@ -213,6 +218,7 @@ typedef struct {
|
||||
uint8_t modes_i4[16]; // mode numbers for intra4 predictions
|
||||
int mode_uv; // mode number of chroma prediction
|
||||
uint32_t nz; // non-zero blocks
|
||||
int8_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3
|
||||
} VP8ModeScore;
|
||||
|
||||
// Iterator structure to iterate through macroblocks, pointing to the
|
||||
@@ -242,6 +248,9 @@ typedef struct {
|
||||
int count_down0_; // starting counter value (for progress)
|
||||
int percent0_; // saved initial progress percent
|
||||
|
||||
DError left_derr_; // left error diffusion (u/v)
|
||||
DError *top_derr_; // top diffusion error - NULL if disabled
|
||||
|
||||
uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
|
||||
uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
|
||||
uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
|
||||
@@ -269,7 +278,7 @@ int VP8IteratorIsDone(const VP8EncIterator* const it);
|
||||
// Import uncompressed samples from source.
|
||||
// If tmp_32 is not NULL, import boundary samples too.
|
||||
// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
|
||||
void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
|
||||
void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32);
|
||||
// export decimated samples
|
||||
void VP8IteratorExport(const VP8EncIterator* const it);
|
||||
// go to next macroblock. Returns false if not finished.
|
||||
@@ -330,9 +339,6 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,
|
||||
// Estimate the final coded size given a set of 'probas'.
|
||||
size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
|
||||
|
||||
// unused for now
|
||||
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
|
||||
|
||||
#endif // !DISABLE_TOKEN_BUFFER
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -404,6 +410,7 @@ struct VP8Encoder {
|
||||
uint8_t* uv_top_; // top u/v samples.
|
||||
// U and V are packed into 16 bytes (8 U + 8 V)
|
||||
LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off)
|
||||
DError* top_derr_; // diffusion error (NULL if disabled)
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -502,19 +509,10 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
|
||||
// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
|
||||
void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
|
||||
|
||||
// in near_lossless.c
|
||||
// Near lossless preprocessing in RGB color-space.
|
||||
int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
|
||||
// Near lossless adjustment for predictors.
|
||||
void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits,
|
||||
const uint32_t* argb_orig,
|
||||
uint32_t* argb, uint32_t* argb_scratch,
|
||||
const uint32_t* const transform_data,
|
||||
int quality, int subtract_green);
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_ENC_VP8ENCI_H_ */
|
||||
#endif // WEBP_ENC_VP8I_ENC_H_
|
||||
|
||||
@@ -11,11 +11,20 @@
|
||||
//
|
||||
// Author: Vikas Arora (vikaas.arora@gmail.com)
|
||||
|
||||
#ifndef WEBP_ENC_VP8LI_H_
|
||||
#define WEBP_ENC_VP8LI_H_
|
||||
#ifndef WEBP_ENC_VP8LI_ENC_H_
|
||||
#define WEBP_ENC_VP8LI_ENC_H_
|
||||
|
||||
#include "./backward_references_enc.h"
|
||||
#include "./histogram_enc.h"
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
#endif
|
||||
// Either WEBP_NEAR_LOSSLESS is defined as 0 in config.h when compiling to
|
||||
// disable near-lossless, or it is enabled by default.
|
||||
#ifndef WEBP_NEAR_LOSSLESS
|
||||
#define WEBP_NEAR_LOSSLESS 1
|
||||
#endif
|
||||
|
||||
#include "../enc/backward_references_enc.h"
|
||||
#include "../enc/histogram_enc.h"
|
||||
#include "../utils/bit_writer_utils.h"
|
||||
#include "../webp/encode.h"
|
||||
#include "../webp/format_constants.h"
|
||||
@@ -27,16 +36,24 @@ extern "C" {
|
||||
// maximum value of transform_bits_ in VP8LEncoder.
|
||||
#define MAX_TRANSFORM_BITS 6
|
||||
|
||||
typedef enum {
|
||||
kEncoderNone = 0,
|
||||
kEncoderARGB,
|
||||
kEncoderNearLossless,
|
||||
kEncoderPalette
|
||||
} VP8LEncoderARGBContent;
|
||||
|
||||
typedef struct {
|
||||
const WebPConfig* config_; // user configuration and parameters
|
||||
const WebPPicture* pic_; // input picture.
|
||||
|
||||
uint32_t* argb_; // Transformed argb image data.
|
||||
uint32_t* argb_scratch_; // Scratch memory for argb rows
|
||||
// (used for prediction).
|
||||
uint32_t* transform_data_; // Scratch memory for transform data.
|
||||
uint32_t* transform_mem_; // Currently allocated memory.
|
||||
size_t transform_mem_size_; // Currently allocated memory size.
|
||||
uint32_t* argb_; // Transformed argb image data.
|
||||
VP8LEncoderARGBContent argb_content_; // Content type of the argb buffer.
|
||||
uint32_t* argb_scratch_; // Scratch memory for argb rows
|
||||
// (used for prediction).
|
||||
uint32_t* transform_data_; // Scratch memory for transform data.
|
||||
uint32_t* transform_mem_; // Currently allocated memory.
|
||||
size_t transform_mem_size_; // Currently allocated memory size.
|
||||
|
||||
int current_width_; // Corresponds to packed image width.
|
||||
|
||||
@@ -54,8 +71,7 @@ typedef struct {
|
||||
uint32_t palette_[MAX_PALETTE_SIZE];
|
||||
|
||||
// Some 'scratch' (potentially large) objects.
|
||||
struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to
|
||||
// LZ77 & RLE coding.
|
||||
struct VP8LBackwardRefs refs_[3]; // Backward Refs array for temporaries.
|
||||
VP8LHashChain hash_chain_; // HashChain data for constructing
|
||||
// backward references.
|
||||
} VP8LEncoder;
|
||||
@@ -75,6 +91,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
|
||||
const WebPPicture* const picture,
|
||||
VP8LBitWriter* const bw, int use_cache);
|
||||
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
// in near_lossless.c
|
||||
// Near lossless preprocessing in RGB color-space.
|
||||
int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
|
||||
uint32_t* const argb_dst);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Image transforms in predictor.c.
|
||||
|
||||
@@ -92,4 +115,4 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_ENC_VP8LI_H_ */
|
||||
#endif // WEBP_ENC_VP8LI_ENC_H_
|
||||
|
||||
@@ -56,8 +56,10 @@ cp $1/src/dsp/rescaler_sse2.c dsp
|
||||
cp $1/src/dsp/upsampling.c dsp
|
||||
cp $1/src/dsp/upsampling_neon.c dsp
|
||||
cp $1/src/dsp/upsampling_sse2.c dsp
|
||||
cp $1/src/dsp/upsampling_sse41.c dsp
|
||||
cp $1/src/dsp/yuv.c dsp
|
||||
cp $1/src/dsp/yuv_sse2.c dsp
|
||||
cp $1/src/dsp/yuv_sse41.c dsp
|
||||
|
||||
mkdir -p enc
|
||||
cp $1/src/enc/*.h enc
|
||||
@@ -74,3 +76,5 @@ cp $1/src/utils/random_utils.c utils
|
||||
cp $1/src/utils/rescaler_utils.c utils
|
||||
cp $1/src/utils/thread_utils.c utils
|
||||
cp $1/src/utils/utils.c utils
|
||||
|
||||
find . \( -name "*.c" -o -name "*.h" \) -exec sed -i 's/#include "src\//#include "..\//g' {} \;
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_BIT_READER_INL_H_
|
||||
#define WEBP_UTILS_BIT_READER_INL_H_
|
||||
#ifndef WEBP_UTILS_BIT_READER_INL_UTILS_H_
|
||||
#define WEBP_UTILS_BIT_READER_INL_UTILS_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
@@ -23,9 +23,9 @@
|
||||
#include <string.h> // for memcpy
|
||||
|
||||
#include "../dsp/dsp.h"
|
||||
#include "./bit_reader_utils.h"
|
||||
#include "./endian_inl_utils.h"
|
||||
#include "./utils.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/endian_inl_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -187,4 +187,4 @@ static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_UTILS_BIT_READER_INL_H_
|
||||
#endif // WEBP_UTILS_BIT_READER_INL_UTILS_H_
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include "../webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "./bit_reader_inl_utils.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
// Vikas Arora (vikaas.arora@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_BIT_READER_H_
|
||||
#define WEBP_UTILS_BIT_READER_H_
|
||||
#ifndef WEBP_UTILS_BIT_READER_UTILS_H_
|
||||
#define WEBP_UTILS_BIT_READER_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#ifdef _MSC_VER
|
||||
@@ -155,9 +155,10 @@ static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
|
||||
|
||||
// For jumping over a number of bits in the bit stream when accessed with
|
||||
// VP8LPrefetchBits and VP8LFillBitWindow.
|
||||
// This function does *not* set br->eos_, since it's speed-critical.
|
||||
// Use with extreme care!
|
||||
static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
|
||||
br->bit_pos_ = val;
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
}
|
||||
|
||||
// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
|
||||
@@ -171,4 +172,4 @@ static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_BIT_READER_H_ */
|
||||
#endif // WEBP_UTILS_BIT_READER_UTILS_H_
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_BIT_WRITER_H_
|
||||
#define WEBP_UTILS_BIT_WRITER_H_
|
||||
#ifndef WEBP_UTILS_BIT_WRITER_UTILS_H_
|
||||
#define WEBP_UTILS_BIT_WRITER_UTILS_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
|
||||
@@ -100,16 +100,24 @@ typedef struct {
|
||||
int error_;
|
||||
} VP8LBitWriter;
|
||||
|
||||
static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
|
||||
static WEBP_INLINE size_t VP8LBitWriterNumBytes(const VP8LBitWriter* const bw) {
|
||||
return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
|
||||
}
|
||||
|
||||
// Returns false in case of memory allocation error.
|
||||
int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
|
||||
// Returns false in case of memory allocation error.
|
||||
int VP8LBitWriterClone(const VP8LBitWriter* const src,
|
||||
VP8LBitWriter* const dst);
|
||||
// Finalize the bitstream coding. Returns a pointer to the internal buffer.
|
||||
uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
|
||||
// Release any pending memory and zeroes the object.
|
||||
void VP8LBitWriterWipeOut(VP8LBitWriter* const bw);
|
||||
// Resets the cursor of the BitWriter bw to when it was like in bw_init.
|
||||
void VP8LBitWriterReset(const VP8LBitWriter* const bw_init,
|
||||
VP8LBitWriter* const bw);
|
||||
// Swaps the memory held by two BitWriters.
|
||||
void VP8LBitWriterSwap(VP8LBitWriter* const src, VP8LBitWriter* const dst);
|
||||
|
||||
// Internal function for VP8LPutBits flushing 32 bits from the written state.
|
||||
void VP8LPutBitsFlushBits(VP8LBitWriter* const bw);
|
||||
@@ -143,4 +151,4 @@ static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_BIT_WRITER_H_ */
|
||||
#endif // WEBP_UTILS_BIT_WRITER_UTILS_H_
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "./color_cache_utils.h"
|
||||
#include "./utils.h"
|
||||
#include "../utils/color_cache_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8LColorCache.
|
||||
|
||||
@@ -12,8 +12,10 @@
|
||||
// Authors: Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_COLOR_CACHE_H_
|
||||
#define WEBP_UTILS_COLOR_CACHE_H_
|
||||
#ifndef WEBP_UTILS_COLOR_CACHE_UTILS_H_
|
||||
#define WEBP_UTILS_COLOR_CACHE_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "../webp/types.h"
|
||||
|
||||
@@ -30,7 +32,7 @@ typedef struct {
|
||||
|
||||
static const uint64_t kHashMul = 0x1e35a7bdull;
|
||||
|
||||
static WEBP_INLINE int HashPix(uint32_t argb, int shift) {
|
||||
static WEBP_INLINE int VP8LHashPix(uint32_t argb, int shift) {
|
||||
return (int)(((argb * kHashMul) & 0xffffffffu) >> shift);
|
||||
}
|
||||
|
||||
@@ -48,19 +50,19 @@ static WEBP_INLINE void VP8LColorCacheSet(const VP8LColorCache* const cc,
|
||||
|
||||
static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
|
||||
uint32_t argb) {
|
||||
const int key = HashPix(argb, cc->hash_shift_);
|
||||
const int key = VP8LHashPix(argb, cc->hash_shift_);
|
||||
cc->colors_[key] = argb;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
|
||||
uint32_t argb) {
|
||||
return HashPix(argb, cc->hash_shift_);
|
||||
return VP8LHashPix(argb, cc->hash_shift_);
|
||||
}
|
||||
|
||||
// Return the key if cc contains argb, and -1 otherwise.
|
||||
static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
|
||||
uint32_t argb) {
|
||||
const int key = HashPix(argb, cc->hash_shift_);
|
||||
const int key = VP8LHashPix(argb, cc->hash_shift_);
|
||||
return (cc->colors_[key] == argb) ? key : -1;
|
||||
}
|
||||
|
||||
@@ -82,4 +84,4 @@ void VP8LColorCacheClear(VP8LColorCache* const color_cache);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBP_UTILS_COLOR_CACHE_H_
|
||||
#endif // WEBP_UTILS_COLOR_CACHE_UTILS_H_
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
//
|
||||
// Endian related functions.
|
||||
|
||||
#ifndef WEBP_UTILS_ENDIAN_INL_H_
|
||||
#define WEBP_UTILS_ENDIAN_INL_H_
|
||||
#ifndef WEBP_UTILS_ENDIAN_INL_UTILS_H_
|
||||
#define WEBP_UTILS_ENDIAN_INL_UTILS_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
@@ -19,13 +19,6 @@
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../webp/types.h"
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
#define HToLE32 BSwap32
|
||||
#define HToLE16 BSwap16
|
||||
@@ -97,4 +90,4 @@ static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
|
||||
#endif // HAVE_BUILTIN_BSWAP64
|
||||
}
|
||||
|
||||
#endif // WEBP_UTILS_ENDIAN_INL_H_
|
||||
#endif // WEBP_UTILS_ENDIAN_INL_UTILS_H_
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#include "./filters_utils.h"
|
||||
#include "../utils/filters_utils.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_FILTERS_H_
|
||||
#define WEBP_UTILS_FILTERS_H_
|
||||
#ifndef WEBP_UTILS_FILTERS_UTILS_H_
|
||||
#define WEBP_UTILS_FILTERS_UTILS_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
#include "../dsp/dsp.h"
|
||||
@@ -29,4 +29,4 @@ WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_FILTERS_H_ */
|
||||
#endif // WEBP_UTILS_FILTERS_UTILS_H_
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Entropy encoding (Huffman) for webp lossless
|
||||
|
||||
#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
|
||||
#define WEBP_UTILS_HUFFMAN_ENCODE_H_
|
||||
#ifndef WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
|
||||
#define WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
|
||||
@@ -57,4 +57,4 @@ void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBP_UTILS_HUFFMAN_ENCODE_H_
|
||||
#endif // WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "./huffman_utils.h"
|
||||
#include "./utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/format_constants.h"
|
||||
|
||||
// Huffman data read via DecodeImageStream is represented in two (red and green)
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Urvang Joshi (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_HUFFMAN_H_
|
||||
#define WEBP_UTILS_HUFFMAN_H_
|
||||
#ifndef WEBP_UTILS_HUFFMAN_UTILS_H_
|
||||
#define WEBP_UTILS_HUFFMAN_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include "../webp/format_constants.h"
|
||||
@@ -85,4 +85,4 @@ int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_UTILS_HUFFMAN_H_
|
||||
#endif // WEBP_UTILS_HUFFMAN_UTILS_H_
|
||||
|
||||
@@ -14,11 +14,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./quant_levels_dec_utils.h"
|
||||
#include "../utils/quant_levels_dec_utils.h"
|
||||
|
||||
#include <string.h> // for memset
|
||||
|
||||
#include "./utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
// #define USE_DITHERING // uncomment to enable ordered dithering (not vital)
|
||||
|
||||
@@ -71,10 +71,11 @@ typedef struct {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define CLIP_MASK (int)(~0U << (8 + DFIX))
|
||||
#define CLIP_8b_MASK (int)(~0U << (8 + DFIX))
|
||||
static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
|
||||
return (!(v & CLIP_8b_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
#undef CLIP_8b_MASK
|
||||
|
||||
// vertical accumulation
|
||||
static void VFilter(SmoothParams* const p) {
|
||||
@@ -260,9 +261,15 @@ static void CleanupParams(SmoothParams* const p) {
|
||||
|
||||
int WebPDequantizeLevels(uint8_t* const data, int width, int height, int stride,
|
||||
int strength) {
|
||||
const int radius = 4 * strength / 100;
|
||||
int radius = 4 * strength / 100;
|
||||
|
||||
if (strength < 0 || strength > 100) return 0;
|
||||
if (data == NULL || width <= 0 || height <= 0) return 0; // bad params
|
||||
|
||||
// limit the filter size to not exceed the image dimensions
|
||||
if (2 * radius + 1 > width) radius = (width - 1) >> 1;
|
||||
if (2 * radius + 1 > height) radius = (height - 1) >> 1;
|
||||
|
||||
if (radius > 0) {
|
||||
SmoothParams p;
|
||||
memset(&p, 0, sizeof(p));
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Vikas Arora (vikasa@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
|
||||
#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
|
||||
#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
|
||||
#define WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
|
||||
@@ -32,4 +32,4 @@ int WebPDequantizeLevels(uint8_t* const data, int width, int height, int stride,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */
|
||||
#endif // WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./quant_levels_utils.h"
|
||||
#include "../utils/quant_levels_utils.h"
|
||||
|
||||
#define NUM_SYMBOLS 256
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Vikas Arora (vikasa@google.com)
|
||||
|
||||
#ifndef WEBP_UTILS_QUANT_LEVELS_H_
|
||||
#define WEBP_UTILS_QUANT_LEVELS_H_
|
||||
#ifndef WEBP_UTILS_QUANT_LEVELS_UTILS_H_
|
||||
#define WEBP_UTILS_QUANT_LEVELS_UTILS_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -33,4 +33,4 @@ int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_QUANT_LEVELS_H_ */
|
||||
#endif // WEBP_UTILS_QUANT_LEVELS_UTILS_H_
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <string.h>
|
||||
#include "./random_utils.h"
|
||||
#include "../utils/random_utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_RANDOM_H_
|
||||
#define WEBP_UTILS_RANDOM_H_
|
||||
#ifndef WEBP_UTILS_RANDOM_UTILS_H_
|
||||
#define WEBP_UTILS_RANDOM_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include "../webp/types.h"
|
||||
@@ -60,4 +60,4 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_RANDOM_H_ */
|
||||
#endif // WEBP_UTILS_RANDOM_UTILS_H_
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../dsp/dsp.h"
|
||||
#include "./rescaler_utils.h"
|
||||
#include "../utils/rescaler_utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -85,11 +85,13 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
|
||||
|
||||
// if width is unspecified, scale original proportionally to height ratio.
|
||||
if (width == 0) {
|
||||
width = (src_width * height + src_height / 2) / src_height;
|
||||
width =
|
||||
(int)(((uint64_t)src_width * height + src_height / 2) / src_height);
|
||||
}
|
||||
// if height is unspecified, scale original proportionally to width ratio.
|
||||
if (height == 0) {
|
||||
height = (src_height * width + src_width / 2) / src_width;
|
||||
height =
|
||||
(int)(((uint64_t)src_height * width + src_width / 2) / src_width);
|
||||
}
|
||||
// Check if the overall dimensions still make sense.
|
||||
if (width <= 0 || height <= 0) {
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_RESCALER_H_
|
||||
#define WEBP_UTILS_RESCALER_H_
|
||||
#ifndef WEBP_UTILS_RESCALER_UTILS_H_
|
||||
#define WEBP_UTILS_RESCALER_UTILS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -98,4 +98,4 @@ int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_RESCALER_H_ */
|
||||
#endif // WEBP_UTILS_RESCALER_UTILS_H_
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // for memset()
|
||||
#include "./thread_utils.h"
|
||||
#include "./utils.h"
|
||||
#include "../utils/thread_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
#ifdef WEBP_USE_THREAD
|
||||
|
||||
@@ -50,11 +50,11 @@ typedef struct {
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
struct WebPWorkerImpl {
|
||||
typedef struct {
|
||||
pthread_mutex_t mutex_;
|
||||
pthread_cond_t condition_;
|
||||
pthread_t thread_;
|
||||
};
|
||||
} WebPWorkerImpl;
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
@@ -201,25 +201,24 @@ static int pthread_cond_wait(pthread_cond_t* const condition,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Execute(WebPWorker* const worker); // Forward declaration.
|
||||
|
||||
static THREADFN ThreadLoop(void* ptr) {
|
||||
WebPWorker* const worker = (WebPWorker*)ptr;
|
||||
WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
|
||||
int done = 0;
|
||||
while (!done) {
|
||||
pthread_mutex_lock(&worker->impl_->mutex_);
|
||||
pthread_mutex_lock(&impl->mutex_);
|
||||
while (worker->status_ == OK) { // wait in idling mode
|
||||
pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
|
||||
pthread_cond_wait(&impl->condition_, &impl->mutex_);
|
||||
}
|
||||
if (worker->status_ == WORK) {
|
||||
Execute(worker);
|
||||
WebPGetWorkerInterface()->Execute(worker);
|
||||
worker->status_ = OK;
|
||||
} else if (worker->status_ == NOT_OK) { // finish the worker
|
||||
done = 1;
|
||||
}
|
||||
// signal to the main thread that we're done (for Sync())
|
||||
pthread_cond_signal(&worker->impl_->condition_);
|
||||
pthread_mutex_unlock(&worker->impl_->mutex_);
|
||||
pthread_cond_signal(&impl->condition_);
|
||||
pthread_mutex_unlock(&impl->mutex_);
|
||||
}
|
||||
return THREAD_RETURN(NULL); // Thread is finished
|
||||
}
|
||||
@@ -229,21 +228,22 @@ static void ChangeState(WebPWorker* const worker, WebPWorkerStatus new_status) {
|
||||
// No-op when attempting to change state on a thread that didn't come up.
|
||||
// Checking status_ without acquiring the lock first would result in a data
|
||||
// race.
|
||||
if (worker->impl_ == NULL) return;
|
||||
WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
|
||||
if (impl == NULL) return;
|
||||
|
||||
pthread_mutex_lock(&worker->impl_->mutex_);
|
||||
pthread_mutex_lock(&impl->mutex_);
|
||||
if (worker->status_ >= OK) {
|
||||
// wait for the worker to finish
|
||||
while (worker->status_ != OK) {
|
||||
pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
|
||||
pthread_cond_wait(&impl->condition_, &impl->mutex_);
|
||||
}
|
||||
// assign new status and release the working thread if needed
|
||||
if (new_status != OK) {
|
||||
worker->status_ = new_status;
|
||||
pthread_cond_signal(&worker->impl_->condition_);
|
||||
pthread_cond_signal(&impl->condition_);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&worker->impl_->mutex_);
|
||||
pthread_mutex_unlock(&impl->mutex_);
|
||||
}
|
||||
|
||||
#endif // WEBP_USE_THREAD
|
||||
@@ -268,26 +268,28 @@ static int Reset(WebPWorker* const worker) {
|
||||
worker->had_error = 0;
|
||||
if (worker->status_ < OK) {
|
||||
#ifdef WEBP_USE_THREAD
|
||||
worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
|
||||
WebPWorkerImpl* const impl =
|
||||
(WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(WebPWorkerImpl));
|
||||
worker->impl_ = (void*)impl;
|
||||
if (worker->impl_ == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
|
||||
if (pthread_mutex_init(&impl->mutex_, NULL)) {
|
||||
goto Error;
|
||||
}
|
||||
if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
|
||||
pthread_mutex_destroy(&worker->impl_->mutex_);
|
||||
if (pthread_cond_init(&impl->condition_, NULL)) {
|
||||
pthread_mutex_destroy(&impl->mutex_);
|
||||
goto Error;
|
||||
}
|
||||
pthread_mutex_lock(&worker->impl_->mutex_);
|
||||
ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
|
||||
pthread_mutex_lock(&impl->mutex_);
|
||||
ok = !pthread_create(&impl->thread_, NULL, ThreadLoop, worker);
|
||||
if (ok) worker->status_ = OK;
|
||||
pthread_mutex_unlock(&worker->impl_->mutex_);
|
||||
pthread_mutex_unlock(&impl->mutex_);
|
||||
if (!ok) {
|
||||
pthread_mutex_destroy(&worker->impl_->mutex_);
|
||||
pthread_cond_destroy(&worker->impl_->condition_);
|
||||
pthread_mutex_destroy(&impl->mutex_);
|
||||
pthread_cond_destroy(&impl->condition_);
|
||||
Error:
|
||||
WebPSafeFree(worker->impl_);
|
||||
WebPSafeFree(impl);
|
||||
worker->impl_ = NULL;
|
||||
return 0;
|
||||
}
|
||||
@@ -318,11 +320,12 @@ static void Launch(WebPWorker* const worker) {
|
||||
static void End(WebPWorker* const worker) {
|
||||
#ifdef WEBP_USE_THREAD
|
||||
if (worker->impl_ != NULL) {
|
||||
WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
|
||||
ChangeState(worker, NOT_OK);
|
||||
pthread_join(worker->impl_->thread_, NULL);
|
||||
pthread_mutex_destroy(&worker->impl_->mutex_);
|
||||
pthread_cond_destroy(&worker->impl_->condition_);
|
||||
WebPSafeFree(worker->impl_);
|
||||
pthread_join(impl->thread_, NULL);
|
||||
pthread_mutex_destroy(&impl->mutex_);
|
||||
pthread_cond_destroy(&impl->condition_);
|
||||
WebPSafeFree(impl);
|
||||
worker->impl_ = NULL;
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_UTILS_THREAD_H_
|
||||
#define WEBP_UTILS_THREAD_H_
|
||||
#ifndef WEBP_UTILS_THREAD_UTILS_H_
|
||||
#define WEBP_UTILS_THREAD_UTILS_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
@@ -35,12 +35,9 @@ typedef enum {
|
||||
// arguments (data1 and data2), and should return false in case of error.
|
||||
typedef int (*WebPWorkerHook)(void*, void*);
|
||||
|
||||
// Platform-dependent implementation details for the worker.
|
||||
typedef struct WebPWorkerImpl WebPWorkerImpl;
|
||||
|
||||
// Synchronization object used to launch job in the worker thread
|
||||
typedef struct {
|
||||
WebPWorkerImpl* impl_;
|
||||
void* impl_; // platform-dependent implementation worker details
|
||||
WebPWorkerStatus status_;
|
||||
WebPWorkerHook hook; // hook to call
|
||||
void* data1; // first argument passed to 'hook'
|
||||
@@ -78,11 +75,11 @@ typedef struct {
|
||||
// decoding takes place. The contents of the interface struct are copied, it
|
||||
// is safe to free the corresponding memory after this call. This function is
|
||||
// not thread-safe. Return false in case of invalid pointer or methods.
|
||||
WEBP_EXTERN(int) WebPSetWorkerInterface(
|
||||
WEBP_EXTERN int WebPSetWorkerInterface(
|
||||
const WebPWorkerInterface* const winterface);
|
||||
|
||||
// Retrieve the currently set thread worker interface.
|
||||
WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
|
||||
WEBP_EXTERN const WebPWorkerInterface* WebPGetWorkerInterface(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -90,4 +87,4 @@ WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_THREAD_H_ */
|
||||
#endif // WEBP_UTILS_THREAD_UTILS_H_
|
||||
|
||||
@@ -16,7 +16,8 @@
|
||||
#include "../webp/decode.h"
|
||||
#include "../webp/encode.h"
|
||||
#include "../webp/format_constants.h" // for MAX_PALETTE_SIZE
|
||||
#include "./utils.h"
|
||||
#include "../utils/color_cache_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
|
||||
// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
|
||||
@@ -252,7 +253,6 @@ int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
|
||||
int num_colors = 0;
|
||||
uint8_t in_use[COLOR_HASH_SIZE] = { 0 };
|
||||
uint32_t colors[COLOR_HASH_SIZE];
|
||||
static const uint64_t kHashMul = 0x1e35a7bdull;
|
||||
const uint32_t* argb = pic->argb;
|
||||
const int width = pic->width;
|
||||
const int height = pic->height;
|
||||
@@ -267,7 +267,7 @@ int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
|
||||
continue;
|
||||
}
|
||||
last_pix = argb[x];
|
||||
key = ((last_pix * kHashMul) & 0xffffffffu) >> COLOR_HASH_RIGHT_SHIFT;
|
||||
key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT);
|
||||
while (1) {
|
||||
if (!in_use[key]) {
|
||||
colors[key] = last_pix;
|
||||
|
||||
+25
-25
@@ -48,13 +48,13 @@ extern "C" {
|
||||
// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
|
||||
// safe malloc() borrows the signature from calloc(), pointing at the dangerous
|
||||
// underlying multiply involved.
|
||||
WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
|
||||
WEBP_EXTERN void* WebPSafeMalloc(uint64_t nmemb, size_t size);
|
||||
// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
|
||||
// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
|
||||
WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
|
||||
WEBP_EXTERN void* WebPSafeCalloc(uint64_t nmemb, size_t size);
|
||||
|
||||
// Companion deallocation function to the above allocations.
|
||||
WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
|
||||
WEBP_EXTERN void WebPSafeFree(void* const ptr);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Alignment
|
||||
@@ -66,7 +66,7 @@ WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
|
||||
// memcpy() is the safe way of moving potentially unaligned 32b memory.
|
||||
static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
|
||||
uint32_t A;
|
||||
memcpy(&A, (const int*)ptr, sizeof(A));
|
||||
memcpy(&A, ptr, sizeof(A));
|
||||
return A;
|
||||
}
|
||||
static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
|
||||
@@ -107,19 +107,6 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
|
||||
PutLE16(data + 2, (int)(val >> 16));
|
||||
}
|
||||
|
||||
// Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
|
||||
// based on table or not. Can be used as fallback if clz() is not available.
|
||||
#define WEBP_NEED_LOG_TABLE_8BIT
|
||||
extern const uint8_t WebPLogTable8bit[256];
|
||||
static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
|
||||
int log = 0;
|
||||
while (n >= 256) {
|
||||
log += 8;
|
||||
n >>= 8;
|
||||
}
|
||||
return log + WebPLogTable8bit[n];
|
||||
}
|
||||
|
||||
// Returns (int)floor(log2(n)). n must be > 0.
|
||||
// use GNU builtins where available.
|
||||
#if defined(__GNUC__) && \
|
||||
@@ -138,6 +125,19 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
|
||||
return first_set_bit;
|
||||
}
|
||||
#else // default: use the C-version.
|
||||
// Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
|
||||
// based on table or not. Can be used as fallback if clz() is not available.
|
||||
#define WEBP_NEED_LOG_TABLE_8BIT
|
||||
extern const uint8_t WebPLogTable8bit[256];
|
||||
static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
|
||||
int log_value = 0;
|
||||
while (n >= 256) {
|
||||
log_value += 8;
|
||||
n >>= 8;
|
||||
}
|
||||
return log_value + WebPLogTable8bit[n];
|
||||
}
|
||||
|
||||
static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
|
||||
#endif
|
||||
|
||||
@@ -147,14 +147,14 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
|
||||
struct WebPPicture;
|
||||
|
||||
// Copy width x height pixels from 'src' to 'dst' honoring the strides.
|
||||
WEBP_EXTERN(void) WebPCopyPlane(const uint8_t* src, int src_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
int width, int height);
|
||||
WEBP_EXTERN void WebPCopyPlane(const uint8_t* src, int src_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
int width, int height);
|
||||
|
||||
// Copy ARGB pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are
|
||||
// assumed to be already allocated and using ARGB data.
|
||||
WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src,
|
||||
struct WebPPicture* const dst);
|
||||
WEBP_EXTERN void WebPCopyPixels(const struct WebPPicture* const src,
|
||||
struct WebPPicture* const dst);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Unique colors.
|
||||
@@ -166,8 +166,8 @@ WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src,
|
||||
// MAX_PALETTE_SIZE, also outputs the actual unique colors into 'palette'.
|
||||
// Note: 'palette' is assumed to be an array already allocated with at least
|
||||
// MAX_PALETTE_SIZE elements.
|
||||
WEBP_EXTERN(int) WebPGetColorPalette(const struct WebPPicture* const pic,
|
||||
uint32_t* const palette);
|
||||
WEBP_EXTERN int WebPGetColorPalette(const struct WebPPicture* const pic,
|
||||
uint32_t* const palette);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -175,4 +175,4 @@ WEBP_EXTERN(int) WebPGetColorPalette(const struct WebPPicture* const pic,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_UTILS_UTILS_H_ */
|
||||
#endif // WEBP_UTILS_UTILS_H_
|
||||
|
||||
+57
-44
@@ -36,39 +36,45 @@ typedef struct WebPDecoderConfig WebPDecoderConfig;
|
||||
|
||||
// Return the decoder's version number, packed in hexadecimal using 8bits for
|
||||
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetDecoderVersion(void);
|
||||
WEBP_EXTERN int WebPGetDecoderVersion(void);
|
||||
|
||||
// Retrieve basic header information: width, height.
|
||||
// This function will also validate the header, returning true on success,
|
||||
// false otherwise. '*width' and '*height' are only valid on successful return.
|
||||
// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
|
||||
WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
// Note: The following chunk sequences (before the raw VP8/VP8L data) are
|
||||
// considered valid by this function:
|
||||
// RIFF + VP8(L)
|
||||
// RIFF + VP8X + (optional chunks) + VP8(L)
|
||||
// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
|
||||
// VP8(L) <-- Not a valid WebP format: only allowed for internal purpose.
|
||||
WEBP_EXTERN int WebPGetInfo(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
// Decodes WebP images pointed to by 'data' and returns RGBA samples, along
|
||||
// with the dimensions in *width and *height. The ordering of samples in
|
||||
// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
|
||||
// The returned pointer should be deleted calling WebPFree().
|
||||
// Returns NULL in case of error.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
|
||||
// If the bitstream contains transparency, it is ignored.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height);
|
||||
|
||||
|
||||
// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
|
||||
@@ -80,13 +86,13 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
|
||||
// have a common stride returned as '*uv_stride'.
|
||||
// Return NULL in case of error.
|
||||
// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height,
|
||||
uint8_t** u, uint8_t** v,
|
||||
int* stride, int* uv_stride);
|
||||
WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height,
|
||||
uint8_t** u, uint8_t** v,
|
||||
int* stride, int* uv_stride);
|
||||
|
||||
// Releases memory returned by the WebPDecode*() functions above.
|
||||
WEBP_EXTERN(void) WebPFree(void* ptr);
|
||||
WEBP_EXTERN void WebPFree(void* ptr);
|
||||
|
||||
// These five functions are variants of the above ones, that decode the image
|
||||
// directly into a pre-allocated buffer 'output_buffer'. The maximum storage
|
||||
@@ -96,22 +102,22 @@ WEBP_EXTERN(void) WebPFree(void* ptr);
|
||||
// The parameter 'output_stride' specifies the distance (in bytes)
|
||||
// between scanlines. Hence, output_buffer_size is expected to be at least
|
||||
// output_stride x picture-height.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeRGBAInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeARGBInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeBGRAInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
|
||||
// RGB and BGR variants. Here too the transparency information, if present,
|
||||
// will be dropped and ignored.
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeRGBInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeBGRInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
|
||||
@@ -122,7 +128,7 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
|
||||
// 'u_size' and 'v_size' respectively.
|
||||
// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
|
||||
// during decoding (or because some buffers were found to be too small).
|
||||
WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
|
||||
WEBP_EXTERN uint8_t* WebPDecodeYUVInto(
|
||||
const uint8_t* data, size_t data_size,
|
||||
uint8_t* luma, size_t luma_size, int luma_stride,
|
||||
uint8_t* u, size_t u_size, int u_stride,
|
||||
@@ -213,7 +219,7 @@ struct WebPDecBuffer {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
|
||||
WEBP_EXTERN int WebPInitDecBufferInternal(WebPDecBuffer*, int);
|
||||
|
||||
// Initialize the structure as empty. Must be called before any other use.
|
||||
// Returns false in case of version mismatch
|
||||
@@ -223,7 +229,7 @@ static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
|
||||
|
||||
// Free any memory associated with the buffer. Must always be called last.
|
||||
// Note: doesn't free the 'buffer' structure itself.
|
||||
WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
|
||||
WEBP_EXTERN void WebPFreeDecBuffer(WebPDecBuffer* buffer);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Enumeration of the status codes
|
||||
@@ -277,7 +283,7 @@ typedef enum VP8StatusCode {
|
||||
// within valid bounds.
|
||||
// All other fields of WebPDecBuffer MUST remain constant between calls.
|
||||
// Returns NULL if the allocation failed.
|
||||
WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
|
||||
WEBP_EXTERN WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer);
|
||||
|
||||
// This function allocates and initializes an incremental-decoder object, which
|
||||
// will output the RGB/A samples specified by 'csp' into a preallocated
|
||||
@@ -289,7 +295,7 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
|
||||
// colorspace 'csp' is taken into account for allocating this buffer. All other
|
||||
// parameters are ignored.
|
||||
// Returns NULL if the allocation failed, or if some parameters are invalid.
|
||||
WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
|
||||
WEBP_EXTERN WebPIDecoder* WebPINewRGB(
|
||||
WEBP_CSP_MODE csp,
|
||||
uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
|
||||
|
||||
@@ -304,7 +310,7 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
|
||||
// In this case, the output buffer will be automatically allocated (using
|
||||
// MODE_YUVA) when decoding starts. All parameters are then ignored.
|
||||
// Returns NULL if the allocation failed or if a parameter is invalid.
|
||||
WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
|
||||
WEBP_EXTERN WebPIDecoder* WebPINewYUVA(
|
||||
uint8_t* luma, size_t luma_size, int luma_stride,
|
||||
uint8_t* u, size_t u_size, int u_stride,
|
||||
uint8_t* v, size_t v_size, int v_stride,
|
||||
@@ -312,19 +318,19 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
|
||||
|
||||
// Deprecated version of the above, without the alpha plane.
|
||||
// Kept for backward compatibility.
|
||||
WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
|
||||
WEBP_EXTERN WebPIDecoder* WebPINewYUV(
|
||||
uint8_t* luma, size_t luma_size, int luma_stride,
|
||||
uint8_t* u, size_t u_size, int u_stride,
|
||||
uint8_t* v, size_t v_size, int v_stride);
|
||||
|
||||
// Deletes the WebPIDecoder object and associated memory. Must always be called
|
||||
// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
|
||||
WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
|
||||
WEBP_EXTERN void WebPIDelete(WebPIDecoder* idec);
|
||||
|
||||
// Copies and decodes the next available data. Returns VP8_STATUS_OK when
|
||||
// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
|
||||
// data is expected. Returns error in other cases.
|
||||
WEBP_EXTERN(VP8StatusCode) WebPIAppend(
|
||||
WEBP_EXTERN VP8StatusCode WebPIAppend(
|
||||
WebPIDecoder* idec, const uint8_t* data, size_t data_size);
|
||||
|
||||
// A variant of the above function to be used when data buffer contains
|
||||
@@ -332,7 +338,7 @@ WEBP_EXTERN(VP8StatusCode) WebPIAppend(
|
||||
// to the internal memory.
|
||||
// Note that the value of the 'data' pointer can change between calls to
|
||||
// WebPIUpdate, for instance when the data buffer is resized to fit larger data.
|
||||
WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
|
||||
WEBP_EXTERN VP8StatusCode WebPIUpdate(
|
||||
WebPIDecoder* idec, const uint8_t* data, size_t data_size);
|
||||
|
||||
// Returns the RGB/A image decoded so far. Returns NULL if output params
|
||||
@@ -340,15 +346,16 @@ WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
|
||||
// specified during call to WebPINewDecoder() or WebPINewRGB().
|
||||
// *last_y is the index of last decoded row in raster scan order. Some pointers
|
||||
// (*last_y, *width etc.) can be NULL if corresponding information is not
|
||||
// needed.
|
||||
WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
|
||||
// needed. The values in these pointers are only valid on successful (non-NULL)
|
||||
// return.
|
||||
WEBP_EXTERN uint8_t* WebPIDecGetRGB(
|
||||
const WebPIDecoder* idec, int* last_y,
|
||||
int* width, int* height, int* stride);
|
||||
|
||||
// Same as above function to get a YUVA image. Returns pointer to the luma
|
||||
// plane or NULL in case of error. If there is no alpha information
|
||||
// the alpha pointer '*a' will be returned NULL.
|
||||
WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
|
||||
WEBP_EXTERN uint8_t* WebPIDecGetYUVA(
|
||||
const WebPIDecoder* idec, int* last_y,
|
||||
uint8_t** u, uint8_t** v, uint8_t** a,
|
||||
int* width, int* height, int* stride, int* uv_stride, int* a_stride);
|
||||
@@ -368,7 +375,7 @@ static WEBP_INLINE uint8_t* WebPIDecGetYUV(
|
||||
// Returns NULL in case the incremental decoder object is in an invalid state.
|
||||
// Otherwise returns the pointer to the internal representation. This structure
|
||||
// is read-only, tied to WebPIDecoder's lifespan and should not be modified.
|
||||
WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
|
||||
WEBP_EXTERN const WebPDecBuffer* WebPIDecodedArea(
|
||||
const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -416,7 +423,7 @@ struct WebPBitstreamFeatures {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
|
||||
WEBP_EXTERN VP8StatusCode WebPGetFeaturesInternal(
|
||||
const uint8_t*, size_t, WebPBitstreamFeatures*, int);
|
||||
|
||||
// Retrieve features from the bitstream. The *features structure is filled
|
||||
@@ -424,6 +431,12 @@ WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
|
||||
// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
|
||||
// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
|
||||
// features from headers. Returns error in other cases.
|
||||
// Note: The following chunk sequences (before the raw VP8/VP8L data) are
|
||||
// considered valid by this function:
|
||||
// RIFF + VP8(L)
|
||||
// RIFF + VP8X + (optional chunks) + VP8(L)
|
||||
// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
|
||||
// VP8(L) <-- Not a valid WebP format: only allowed for internal purpose.
|
||||
static WEBP_INLINE VP8StatusCode WebPGetFeatures(
|
||||
const uint8_t* data, size_t data_size,
|
||||
WebPBitstreamFeatures* features) {
|
||||
@@ -457,7 +470,7 @@ struct WebPDecoderConfig {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
|
||||
WEBP_EXTERN int WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
|
||||
|
||||
// Initialize the configuration as empty. This function must always be
|
||||
// called first, unless WebPGetFeatures() is to be called.
|
||||
@@ -477,17 +490,17 @@ static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
|
||||
// The return WebPIDecoder object must always be deleted calling WebPIDelete().
|
||||
// Returns NULL in case of error (and config->status will then reflect
|
||||
// the error condition, if available).
|
||||
WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
|
||||
WebPDecoderConfig* config);
|
||||
WEBP_EXTERN WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
|
||||
WebPDecoderConfig* config);
|
||||
|
||||
// Non-incremental version. This version decodes the full data at once, taking
|
||||
// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
|
||||
// if the decoding was successful). Note that 'config' cannot be NULL.
|
||||
WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
|
||||
WebPDecoderConfig* config);
|
||||
WEBP_EXTERN VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
|
||||
WebPDecoderConfig* config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_DECODE_H_ */
|
||||
#endif // WEBP_WEBP_DECODE_H_
|
||||
|
||||
+37
-32
@@ -71,7 +71,7 @@ typedef struct WebPAnimDecoderOptions WebPAnimDecoderOptions;
|
||||
|
||||
// Returns the version number of the demux library, packed in hexadecimal using
|
||||
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetDemuxVersion(void);
|
||||
WEBP_EXTERN int WebPGetDemuxVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Life of a Demux object
|
||||
@@ -85,7 +85,7 @@ typedef enum WebPDemuxState {
|
||||
} WebPDemuxState;
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
|
||||
WEBP_EXTERN WebPDemuxer* WebPDemuxInternal(
|
||||
const WebPData*, int, WebPDemuxState*, int);
|
||||
|
||||
// Parses the full WebP file given by 'data'. For single images the WebP file
|
||||
@@ -109,27 +109,32 @@ static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
|
||||
}
|
||||
|
||||
// Frees memory associated with 'dmux'.
|
||||
WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
|
||||
WEBP_EXTERN void WebPDemuxDelete(WebPDemuxer* dmux);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Data/information extraction.
|
||||
|
||||
typedef enum WebPFormatFeature {
|
||||
WEBP_FF_FORMAT_FLAGS, // Extended format flags present in the 'VP8X' chunk.
|
||||
WEBP_FF_FORMAT_FLAGS, // bit-wise combination of WebPFeatureFlags
|
||||
// corresponding to the 'VP8X' chunk (if present).
|
||||
WEBP_FF_CANVAS_WIDTH,
|
||||
WEBP_FF_CANVAS_HEIGHT,
|
||||
WEBP_FF_LOOP_COUNT,
|
||||
WEBP_FF_BACKGROUND_COLOR,
|
||||
WEBP_FF_FRAME_COUNT // Number of frames present in the demux object.
|
||||
// In case of a partial demux, this is the number of
|
||||
// frames seen so far, with the last frame possibly
|
||||
// being partial.
|
||||
WEBP_FF_LOOP_COUNT, // only relevant for animated file
|
||||
WEBP_FF_BACKGROUND_COLOR, // idem.
|
||||
WEBP_FF_FRAME_COUNT // Number of frames present in the demux object.
|
||||
// In case of a partial demux, this is the number
|
||||
// of frames seen so far, with the last frame
|
||||
// possibly being partial.
|
||||
} WebPFormatFeature;
|
||||
|
||||
// Get the 'feature' value from the 'dmux'.
|
||||
// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
|
||||
// returned a state > WEBP_DEMUX_PARSING_HEADER.
|
||||
WEBP_EXTERN(uint32_t) WebPDemuxGetI(
|
||||
// If 'feature' is WEBP_FF_FORMAT_FLAGS, the returned value is a bit-wise
|
||||
// combination of WebPFeatureFlags values.
|
||||
// If 'feature' is WEBP_FF_LOOP_COUNT, WEBP_FF_BACKGROUND_COLOR, the returned
|
||||
// value is only meaningful if the bitstream is animated.
|
||||
WEBP_EXTERN uint32_t WebPDemuxGetI(
|
||||
const WebPDemuxer* dmux, WebPFormatFeature feature);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -159,20 +164,20 @@ struct WebPIterator {
|
||||
// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
|
||||
// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
|
||||
// NOTE: 'dmux' must persist for the lifetime of 'iter'.
|
||||
WEBP_EXTERN(int) WebPDemuxGetFrame(
|
||||
WEBP_EXTERN int WebPDemuxGetFrame(
|
||||
const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
|
||||
|
||||
// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
|
||||
// previous ('iter->frame_num' - 1) frame. These functions do not loop.
|
||||
// Returns true on success, false otherwise.
|
||||
WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
|
||||
WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
|
||||
WEBP_EXTERN int WebPDemuxNextFrame(WebPIterator* iter);
|
||||
WEBP_EXTERN int WebPDemuxPrevFrame(WebPIterator* iter);
|
||||
|
||||
// Releases any memory associated with 'iter'.
|
||||
// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
|
||||
// iter. Also, must be called before destroying the associated WebPDemuxer with
|
||||
// WebPDemuxDelete().
|
||||
WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
|
||||
WEBP_EXTERN void WebPDemuxReleaseIterator(WebPIterator* iter);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chunk iteration.
|
||||
@@ -197,20 +202,20 @@ struct WebPChunkIterator {
|
||||
// payloads are accessed through WebPDemuxGetFrame() and related functions.
|
||||
// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
|
||||
// NOTE: 'dmux' must persist for the lifetime of the iterator.
|
||||
WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
|
||||
const char fourcc[4], int chunk_number,
|
||||
WebPChunkIterator* iter);
|
||||
WEBP_EXTERN int WebPDemuxGetChunk(const WebPDemuxer* dmux,
|
||||
const char fourcc[4], int chunk_number,
|
||||
WebPChunkIterator* iter);
|
||||
|
||||
// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
|
||||
// ('iter->chunk_num' - 1) chunk. These functions do not loop.
|
||||
// Returns true on success, false otherwise.
|
||||
WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
|
||||
WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
|
||||
WEBP_EXTERN int WebPDemuxNextChunk(WebPChunkIterator* iter);
|
||||
WEBP_EXTERN int WebPDemuxPrevChunk(WebPChunkIterator* iter);
|
||||
|
||||
// Releases any memory associated with 'iter'.
|
||||
// Must be called before destroying the associated WebPDemuxer with
|
||||
// WebPDemuxDelete().
|
||||
WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
|
||||
WEBP_EXTERN void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPAnimDecoder API
|
||||
@@ -252,7 +257,7 @@ struct WebPAnimDecoderOptions {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point.
|
||||
WEBP_EXTERN(int) WebPAnimDecoderOptionsInitInternal(
|
||||
WEBP_EXTERN int WebPAnimDecoderOptionsInitInternal(
|
||||
WebPAnimDecoderOptions*, int);
|
||||
|
||||
// Should always be called, to initialize a fresh WebPAnimDecoderOptions
|
||||
@@ -266,7 +271,7 @@ static WEBP_INLINE int WebPAnimDecoderOptionsInit(
|
||||
}
|
||||
|
||||
// Internal, version-checked, entry point.
|
||||
WEBP_EXTERN(WebPAnimDecoder*) WebPAnimDecoderNewInternal(
|
||||
WEBP_EXTERN WebPAnimDecoder* WebPAnimDecoderNewInternal(
|
||||
const WebPData*, const WebPAnimDecoderOptions*, int);
|
||||
|
||||
// Creates and initializes a WebPAnimDecoder object.
|
||||
@@ -301,8 +306,8 @@ struct WebPAnimInfo {
|
||||
// info - (out) global information fetched from the animation.
|
||||
// Returns:
|
||||
// True on success.
|
||||
WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
|
||||
WebPAnimInfo* info);
|
||||
WEBP_EXTERN int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
|
||||
WebPAnimInfo* info);
|
||||
|
||||
// Fetch the next frame from 'dec' based on options supplied to
|
||||
// WebPAnimDecoderNew(). This will be a fully reconstructed canvas of size
|
||||
@@ -316,8 +321,8 @@ WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
|
||||
// Returns:
|
||||
// False if any of the arguments are NULL, or if there is a parsing or
|
||||
// decoding error, or if there are no more frames. Otherwise, returns true.
|
||||
WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
|
||||
uint8_t** buf, int* timestamp);
|
||||
WEBP_EXTERN int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
|
||||
uint8_t** buf, int* timestamp);
|
||||
|
||||
// Check if there are more frames left to decode.
|
||||
// Parameters:
|
||||
@@ -325,7 +330,7 @@ WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
|
||||
// Returns:
|
||||
// True if 'dec' is not NULL and some frames are yet to be decoded.
|
||||
// Otherwise, returns false.
|
||||
WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
|
||||
WEBP_EXTERN int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
|
||||
|
||||
// Resets the WebPAnimDecoder object, so that next call to
|
||||
// WebPAnimDecoderGetNext() will restart decoding from 1st frame. This would be
|
||||
@@ -333,7 +338,7 @@ WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
|
||||
// info.loop_count times) without destroying and recreating the 'dec' object.
|
||||
// Parameters:
|
||||
// dec - (in/out) decoder instance to be reset
|
||||
WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
|
||||
WEBP_EXTERN void WebPAnimDecoderReset(WebPAnimDecoder* dec);
|
||||
|
||||
// Grab the internal demuxer object.
|
||||
// Getting the demuxer object can be useful if one wants to use operations only
|
||||
@@ -343,16 +348,16 @@ WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
|
||||
//
|
||||
// Parameters:
|
||||
// dec - (in) decoder instance from which the demuxer object is to be fetched.
|
||||
WEBP_EXTERN(const WebPDemuxer*) WebPAnimDecoderGetDemuxer(
|
||||
WEBP_EXTERN const WebPDemuxer* WebPAnimDecoderGetDemuxer(
|
||||
const WebPAnimDecoder* dec);
|
||||
|
||||
// Deletes the WebPAnimDecoder object.
|
||||
// Parameters:
|
||||
// dec - (in/out) decoder instance to be deleted
|
||||
WEBP_EXTERN(void) WebPAnimDecoderDelete(WebPAnimDecoder* dec);
|
||||
WEBP_EXTERN void WebPAnimDecoderDelete(WebPAnimDecoder* dec);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_DEMUX_H_ */
|
||||
#endif // WEBP_WEBP_DEMUX_H_
|
||||
|
||||
+68
-65
@@ -35,7 +35,7 @@ typedef struct WebPMemoryWriter WebPMemoryWriter;
|
||||
|
||||
// Return the encoder's version number, packed in hexadecimal using 8bits for
|
||||
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetEncoderVersion(void);
|
||||
WEBP_EXTERN int WebPGetEncoderVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// One-stop-shop call! No questions asked:
|
||||
@@ -46,37 +46,37 @@ WEBP_EXTERN(int) WebPGetEncoderVersion(void);
|
||||
// These functions compress using the lossy format, and the quality_factor
|
||||
// can go from 0 (smaller output, lower quality) to 100 (best quality,
|
||||
// larger output).
|
||||
WEBP_EXTERN(size_t) WebPEncodeRGB(const uint8_t* rgb,
|
||||
WEBP_EXTERN size_t WebPEncodeRGB(const uint8_t* rgb,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
WEBP_EXTERN size_t WebPEncodeBGR(const uint8_t* bgr,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
WEBP_EXTERN size_t WebPEncodeRGBA(const uint8_t* rgba,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeBGR(const uint8_t* bgr,
|
||||
WEBP_EXTERN size_t WebPEncodeBGRA(const uint8_t* bgra,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeRGBA(const uint8_t* rgba,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeBGRA(const uint8_t* bgra,
|
||||
int width, int height, int stride,
|
||||
float quality_factor, uint8_t** output);
|
||||
|
||||
// These functions are the equivalent of the above, but compressing in a
|
||||
// lossless manner. Files are usually larger than lossy format, but will
|
||||
// not suffer any compression loss.
|
||||
WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb,
|
||||
WEBP_EXTERN size_t WebPEncodeLosslessRGB(const uint8_t* rgb,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
WEBP_EXTERN size_t WebPEncodeLosslessBGR(const uint8_t* bgr,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
WEBP_EXTERN size_t WebPEncodeLosslessRGBA(const uint8_t* rgba,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr,
|
||||
WEBP_EXTERN size_t WebPEncodeLosslessBGRA(const uint8_t* bgra,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
|
||||
int width, int height, int stride,
|
||||
uint8_t** output);
|
||||
|
||||
// Releases memory returned by the WebPEncode*() functions above.
|
||||
WEBP_EXTERN(void) WebPFree(void* ptr);
|
||||
WEBP_EXTERN void WebPFree(void* ptr);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Coding parameters
|
||||
@@ -93,12 +93,15 @@ typedef enum WebPImageHint {
|
||||
// Compression parameters.
|
||||
struct WebPConfig {
|
||||
int lossless; // Lossless encoding (0=lossy(default), 1=lossless).
|
||||
float quality; // between 0 (smallest file) and 100 (biggest)
|
||||
float quality; // between 0 and 100. For lossy, 0 gives the smallest
|
||||
// size and 100 the largest. For lossless, this
|
||||
// parameter is the amount of effort put into the
|
||||
// compression: 0 is the fastest but gives larger
|
||||
// files compared to the slowest, but best, 100.
|
||||
int method; // quality/speed trade-off (0=fast, 6=slower-better)
|
||||
|
||||
WebPImageHint image_hint; // Hint for image type (lossless only for now).
|
||||
|
||||
// Parameters related to lossy compression only:
|
||||
int target_size; // if non-zero, set the desired target size in bytes.
|
||||
// Takes precedence over the 'compression' parameter.
|
||||
float target_PSNR; // if non-zero, specifies the minimal distortion to
|
||||
@@ -159,7 +162,7 @@ typedef enum WebPPreset {
|
||||
} WebPPreset;
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
|
||||
WEBP_EXTERN int WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
|
||||
|
||||
// Should always be called, to initialize a fresh WebPConfig structure before
|
||||
// modification. Returns false in case of version mismatch. WebPConfigInit()
|
||||
@@ -186,15 +189,15 @@ static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
|
||||
// speed and final compressed size.
|
||||
// This function will overwrite several fields from config: 'method', 'quality'
|
||||
// and 'lossless'. Returns false in case of parameter error.
|
||||
WEBP_EXTERN(int) WebPConfigLosslessPreset(WebPConfig* config, int level);
|
||||
WEBP_EXTERN int WebPConfigLosslessPreset(WebPConfig* config, int level);
|
||||
|
||||
// Returns true if 'config' is non-NULL and all configuration parameters are
|
||||
// within their valid ranges.
|
||||
WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
|
||||
WEBP_EXTERN int WebPValidateConfig(const WebPConfig* config);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Input / Output
|
||||
// Structure for storing auxiliary statistics (mostly for lossy encoding).
|
||||
// Structure for storing auxiliary statistics.
|
||||
|
||||
struct WebPAuxStats {
|
||||
int coded_size; // final size
|
||||
@@ -242,16 +245,16 @@ struct WebPMemoryWriter {
|
||||
};
|
||||
|
||||
// The following must be called first before any use.
|
||||
WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
|
||||
WEBP_EXTERN void WebPMemoryWriterInit(WebPMemoryWriter* writer);
|
||||
|
||||
// The following must be called to deallocate writer->mem memory. The 'writer'
|
||||
// object itself is not deallocated.
|
||||
WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer);
|
||||
WEBP_EXTERN void WebPMemoryWriterClear(WebPMemoryWriter* writer);
|
||||
// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
|
||||
// completion, writer.mem and writer.size will hold the coded data.
|
||||
// writer.mem must be freed by calling WebPMemoryWriterClear.
|
||||
WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
|
||||
const WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPMemoryWrite(const uint8_t* data, size_t data_size,
|
||||
const WebPPicture* picture);
|
||||
|
||||
// Progress hook, called from time to time to report progress. It can return
|
||||
// false to request an abort of the encoding process, or true otherwise if
|
||||
@@ -354,7 +357,7 @@ struct WebPPicture {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
|
||||
WEBP_EXTERN int WebPPictureInitInternal(WebPPicture*, int);
|
||||
|
||||
// Should always be called, to initialize the structure. Returns false in case
|
||||
// of version mismatch. WebPPictureInit() must have succeeded before using the
|
||||
@@ -371,20 +374,20 @@ static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
|
||||
// Allocate y/u/v buffers as per colorspace/width/height specification.
|
||||
// Note! This function will free the previous buffer if needed.
|
||||
// Returns false in case of memory error.
|
||||
WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureAlloc(WebPPicture* picture);
|
||||
|
||||
// Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
|
||||
// Note that this function does _not_ free the memory used by the 'picture'
|
||||
// object itself.
|
||||
// Besides memory (which is reclaimed) all other fields of 'picture' are
|
||||
// preserved.
|
||||
WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
|
||||
WEBP_EXTERN void WebPPictureFree(WebPPicture* picture);
|
||||
|
||||
// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
|
||||
// will fully own the copied pixels (this is not a view). The 'dst' picture need
|
||||
// not be initialized as its content is overwritten.
|
||||
// Returns false in case of memory allocation error.
|
||||
WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
|
||||
WEBP_EXTERN int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
|
||||
|
||||
// Compute the single distortion for packed planes of samples.
|
||||
// 'src' will be compared to 'ref', and the raw distortion stored into
|
||||
@@ -393,19 +396,19 @@ WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
|
||||
// 'x_step' is the horizontal stride (in bytes) between samples.
|
||||
// 'src/ref_stride' is the byte distance between rows.
|
||||
// Returns false in case of error (bad parameter, memory allocation error, ...).
|
||||
WEBP_EXTERN(int) WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
|
||||
const uint8_t* ref, size_t ref_stride,
|
||||
int width, int height,
|
||||
size_t x_step,
|
||||
int type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
|
||||
float* distortion, float* result);
|
||||
WEBP_EXTERN int WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
|
||||
const uint8_t* ref, size_t ref_stride,
|
||||
int width, int height,
|
||||
size_t x_step,
|
||||
int type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
|
||||
float* distortion, float* result);
|
||||
|
||||
// Compute PSNR, SSIM or LSIM distortion metric between two pictures. Results
|
||||
// are in dB, stored in result[] in the B/G/R/A/All order. The distortion is
|
||||
// always performed using ARGB samples. Hence if the input is YUV(A), the
|
||||
// picture will be internally converted to ARGB (just for the measurement).
|
||||
// Warning: this function is rather CPU-intensive.
|
||||
WEBP_EXTERN(int) WebPPictureDistortion(
|
||||
WEBP_EXTERN int WebPPictureDistortion(
|
||||
const WebPPicture* src, const WebPPicture* ref,
|
||||
int metric_type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
|
||||
float result[5]);
|
||||
@@ -418,8 +421,8 @@ WEBP_EXTERN(int) WebPPictureDistortion(
|
||||
// must be fully be comprised inside the 'src' source picture. If the source
|
||||
// picture uses the YUV420 colorspace, the top and left coordinates will be
|
||||
// snapped to even values.
|
||||
WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
|
||||
int left, int top, int width, int height);
|
||||
WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
// Extracts a view from 'src' picture into 'dst'. The rectangle for the view
|
||||
// is defined by the top-left corner pixel coordinates (left, top) as well
|
||||
@@ -432,42 +435,42 @@ WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
|
||||
// with WebPPictureInit() if it is different from 'src', since its content will
|
||||
// be overwritten.
|
||||
// Returns false in case of memory allocation error or invalid parameters.
|
||||
WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
|
||||
int left, int top, int width, int height,
|
||||
WebPPicture* dst);
|
||||
WEBP_EXTERN int WebPPictureView(const WebPPicture* src,
|
||||
int left, int top, int width, int height,
|
||||
WebPPicture* dst);
|
||||
|
||||
// Returns true if the 'picture' is actually a view and therefore does
|
||||
// not own the memory for pixels.
|
||||
WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture);
|
||||
|
||||
// Rescale a picture to new dimension width x height.
|
||||
// If either 'width' or 'height' (but not both) is 0 the corresponding
|
||||
// dimension will be calculated preserving the aspect ratio.
|
||||
// No gamma correction is applied.
|
||||
// Returns false in case of error (invalid parameter or insufficient memory).
|
||||
WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
|
||||
WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height);
|
||||
|
||||
// Colorspace conversion function to import RGB samples.
|
||||
// Previous buffer will be free'd, if any.
|
||||
// *rgb buffer should have a size of at least height * rgb_stride.
|
||||
// Returns false in case of memory error.
|
||||
WEBP_EXTERN(int) WebPPictureImportRGB(
|
||||
WEBP_EXTERN int WebPPictureImportRGB(
|
||||
WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
|
||||
// Same, but for RGBA buffer.
|
||||
WEBP_EXTERN(int) WebPPictureImportRGBA(
|
||||
WEBP_EXTERN int WebPPictureImportRGBA(
|
||||
WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
|
||||
// Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
|
||||
// input buffer ignoring the alpha channel. Avoids needing to copy the data
|
||||
// to a temporary 24-bit RGB buffer to import the RGB only.
|
||||
WEBP_EXTERN(int) WebPPictureImportRGBX(
|
||||
WEBP_EXTERN int WebPPictureImportRGBX(
|
||||
WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
|
||||
|
||||
// Variants of the above, but taking BGR(A|X) input.
|
||||
WEBP_EXTERN(int) WebPPictureImportBGR(
|
||||
WEBP_EXTERN int WebPPictureImportBGR(
|
||||
WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
|
||||
WEBP_EXTERN(int) WebPPictureImportBGRA(
|
||||
WEBP_EXTERN int WebPPictureImportBGRA(
|
||||
WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
|
||||
WEBP_EXTERN(int) WebPPictureImportBGRX(
|
||||
WEBP_EXTERN int WebPPictureImportBGRX(
|
||||
WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
|
||||
|
||||
// Converts picture->argb data to the YUV420A format. The 'colorspace'
|
||||
@@ -476,14 +479,14 @@ WEBP_EXTERN(int) WebPPictureImportBGRX(
|
||||
// non-opaque transparent values is detected, and 'colorspace' will be
|
||||
// adjusted accordingly. Note that this method is lossy.
|
||||
// Returns false in case of error.
|
||||
WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
|
||||
WebPEncCSP /*colorspace = WEBP_YUV420*/);
|
||||
WEBP_EXTERN int WebPPictureARGBToYUVA(WebPPicture* picture,
|
||||
WebPEncCSP /*colorspace = WEBP_YUV420*/);
|
||||
|
||||
// Same as WebPPictureARGBToYUVA(), but the conversion is done using
|
||||
// pseudo-random dithering with a strength 'dithering' between
|
||||
// 0.0 (no dithering) and 1.0 (maximum dithering). This is useful
|
||||
// for photographic picture.
|
||||
WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
|
||||
WEBP_EXTERN int WebPPictureARGBToYUVADithered(
|
||||
WebPPicture* picture, WebPEncCSP colorspace, float dithering);
|
||||
|
||||
// Performs 'sharp' RGBA->YUVA420 downsampling and colorspace conversion.
|
||||
@@ -491,9 +494,9 @@ WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
|
||||
// method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better
|
||||
// and sharper YUV representation.
|
||||
// Returns false in case of error.
|
||||
WEBP_EXTERN(int) WebPPictureSharpARGBToYUVA(WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureSharpARGBToYUVA(WebPPicture* picture);
|
||||
// kept for backward compatibility:
|
||||
WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureSmartARGBToYUVA(WebPPicture* picture);
|
||||
|
||||
// Converts picture->yuv to picture->argb and sets picture->use_argb to true.
|
||||
// The input format must be YUV_420 or YUV_420A. The conversion from YUV420 to
|
||||
@@ -501,22 +504,22 @@ WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
|
||||
// Note that the use of this colorspace is discouraged if one has access to the
|
||||
// raw ARGB samples, since using YUV420 is comparatively lossy.
|
||||
// Returns false in case of error.
|
||||
WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureYUVAToARGB(WebPPicture* picture);
|
||||
|
||||
// Helper function: given a width x height plane of RGBA or YUV(A) samples
|
||||
// clean-up the YUV or RGB samples under fully transparent area, to help
|
||||
// compressibility (no guarantee, though).
|
||||
WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
|
||||
// clean-up or smoothen the YUV or RGB samples under fully transparent area,
|
||||
// to help compressibility (no guarantee, though).
|
||||
WEBP_EXTERN void WebPCleanupTransparentArea(WebPPicture* picture);
|
||||
|
||||
// Scan the picture 'picture' for the presence of non fully opaque alpha values.
|
||||
// Returns true in such case. Otherwise returns false (indicating that the
|
||||
// alpha plane can be ignored altogether e.g.).
|
||||
WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture);
|
||||
|
||||
// Remove the transparency information (if present) by blending the color with
|
||||
// the background color 'background_rgb' (specified as 24bit RGB triplet).
|
||||
// After this call, all alpha values are reset to 0xff.
|
||||
WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
|
||||
WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main call
|
||||
@@ -531,7 +534,7 @@ WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
|
||||
// the former for lossy encoding, and the latter for lossless encoding
|
||||
// (when config.lossless is true). Automatic conversion from one format to
|
||||
// another is provided but they both incur some loss.
|
||||
WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPEncode(const WebPConfig* config, WebPPicture* picture);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -539,4 +542,4 @@ WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_ENCODE_H_ */
|
||||
#endif // WEBP_WEBP_ENCODE_H_
|
||||
|
||||
@@ -84,4 +84,4 @@ typedef enum {
|
||||
// overflow a uint32_t.
|
||||
#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1)
|
||||
|
||||
#endif /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */
|
||||
#endif // WEBP_WEBP_FORMAT_CONSTANTS_H_
|
||||
|
||||
+31
-31
@@ -98,13 +98,13 @@ typedef enum WebPChunkId {
|
||||
|
||||
// Returns the version number of the mux library, packed in hexadecimal using
|
||||
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetMuxVersion(void);
|
||||
WEBP_EXTERN int WebPGetMuxVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Life of a Mux object
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
|
||||
WEBP_EXTERN WebPMux* WebPNewInternal(int);
|
||||
|
||||
// Creates an empty mux object.
|
||||
// Returns:
|
||||
@@ -117,13 +117,13 @@ static WEBP_INLINE WebPMux* WebPMuxNew(void) {
|
||||
// Deletes the mux object.
|
||||
// Parameters:
|
||||
// mux - (in/out) object to be deleted
|
||||
WEBP_EXTERN(void) WebPMuxDelete(WebPMux* mux);
|
||||
WEBP_EXTERN void WebPMuxDelete(WebPMux* mux);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Mux creation.
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
|
||||
WEBP_EXTERN WebPMux* WebPMuxCreateInternal(const WebPData*, int, int);
|
||||
|
||||
// Creates a mux object from raw data given in WebP RIFF format.
|
||||
// Parameters:
|
||||
@@ -160,7 +160,7 @@ static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
|
||||
// or if fourcc corresponds to an image chunk.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxSetChunk(
|
||||
WebPMux* mux, const char fourcc[4], const WebPData* chunk_data,
|
||||
int copy_data);
|
||||
|
||||
@@ -176,7 +176,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
|
||||
// or if fourcc corresponds to an image chunk.
|
||||
// WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxGetChunk(
|
||||
const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
|
||||
|
||||
// Deletes the chunk with the given 'fourcc' from the mux object.
|
||||
@@ -189,7 +189,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
|
||||
// or if fourcc corresponds to an image chunk.
|
||||
// WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxDeleteChunk(
|
||||
WebPMux* mux, const char fourcc[4]);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -222,7 +222,7 @@ struct WebPMuxFrameInfo {
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxSetImage(
|
||||
WebPMux* mux, const WebPData* bitstream, int copy_data);
|
||||
|
||||
// Adds a frame at the end of the mux object.
|
||||
@@ -241,7 +241,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
|
||||
// or if content of 'frame' is invalid.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxPushFrame(
|
||||
WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
|
||||
|
||||
// Gets the nth frame from the mux object.
|
||||
@@ -259,7 +259,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
|
||||
// WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxGetFrame(
|
||||
const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
|
||||
|
||||
// Deletes a frame from the mux object.
|
||||
@@ -272,7 +272,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
|
||||
// WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
|
||||
// before deletion.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Animation.
|
||||
@@ -296,7 +296,7 @@ struct WebPMuxAnimParams {
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxSetAnimationParams(
|
||||
WebPMux* mux, const WebPMuxAnimParams* params);
|
||||
|
||||
// Gets the animation parameters from the mux object.
|
||||
@@ -307,7 +307,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
|
||||
// WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
|
||||
WEBP_EXTERN WebPMuxError WebPMuxGetAnimationParams(
|
||||
const WebPMux* mux, WebPMuxAnimParams* params);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -328,8 +328,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux is NULL; or
|
||||
// width or height are invalid or out of bounds
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
|
||||
int width, int height);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxSetCanvasSize(WebPMux* mux,
|
||||
int width, int height);
|
||||
|
||||
// Gets the canvas size from the mux object.
|
||||
// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
|
||||
@@ -343,8 +343,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux, width or height is NULL.
|
||||
// WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
|
||||
int* width, int* height);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxGetCanvasSize(const WebPMux* mux,
|
||||
int* width, int* height);
|
||||
|
||||
// Gets the feature flags from the mux object.
|
||||
// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
|
||||
@@ -359,8 +359,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL.
|
||||
// WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
|
||||
uint32_t* flags);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxGetFeatures(const WebPMux* mux,
|
||||
uint32_t* flags);
|
||||
|
||||
// Gets number of chunks with the given 'id' in the mux object.
|
||||
// Parameters:
|
||||
@@ -370,8 +370,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
|
||||
// Returns:
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux, or num_elements is NULL.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
|
||||
WebPChunkId id, int* num_elements);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxNumChunks(const WebPMux* mux,
|
||||
WebPChunkId id, int* num_elements);
|
||||
|
||||
// Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
|
||||
// This function also validates the mux object.
|
||||
@@ -388,8 +388,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
|
||||
// WEBP_MUX_INVALID_ARGUMENT - if mux or assembled_data is NULL.
|
||||
// WEBP_MUX_MEMORY_ERROR - on memory allocation error.
|
||||
// WEBP_MUX_OK - on success.
|
||||
WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
|
||||
WebPData* assembled_data);
|
||||
WEBP_EXTERN WebPMuxError WebPMuxAssemble(WebPMux* mux,
|
||||
WebPData* assembled_data);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPAnimEncoder API
|
||||
@@ -442,7 +442,7 @@ struct WebPAnimEncoderOptions {
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point.
|
||||
WEBP_EXTERN(int) WebPAnimEncoderOptionsInitInternal(
|
||||
WEBP_EXTERN int WebPAnimEncoderOptionsInitInternal(
|
||||
WebPAnimEncoderOptions*, int);
|
||||
|
||||
// Should always be called, to initialize a fresh WebPAnimEncoderOptions
|
||||
@@ -455,7 +455,7 @@ static WEBP_INLINE int WebPAnimEncoderOptionsInit(
|
||||
}
|
||||
|
||||
// Internal, version-checked, entry point.
|
||||
WEBP_EXTERN(WebPAnimEncoder*) WebPAnimEncoderNewInternal(
|
||||
WEBP_EXTERN WebPAnimEncoder* WebPAnimEncoderNewInternal(
|
||||
int, int, const WebPAnimEncoderOptions*, int);
|
||||
|
||||
// Creates and initializes a WebPAnimEncoder object.
|
||||
@@ -490,7 +490,7 @@ static WEBP_INLINE WebPAnimEncoder* WebPAnimEncoderNew(
|
||||
// Returns:
|
||||
// On error, returns false and frame->error_code is set appropriately.
|
||||
// Otherwise, returns true.
|
||||
WEBP_EXTERN(int) WebPAnimEncoderAdd(
|
||||
WEBP_EXTERN int WebPAnimEncoderAdd(
|
||||
WebPAnimEncoder* enc, struct WebPPicture* frame, int timestamp_ms,
|
||||
const struct WebPConfig* config);
|
||||
|
||||
@@ -503,8 +503,8 @@ WEBP_EXTERN(int) WebPAnimEncoderAdd(
|
||||
// webp_data - (out) generated WebP bitstream.
|
||||
// Returns:
|
||||
// True on success.
|
||||
WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
|
||||
WebPData* webp_data);
|
||||
WEBP_EXTERN int WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
|
||||
WebPData* webp_data);
|
||||
|
||||
// Get error string corresponding to the most recent call using 'enc'. The
|
||||
// returned string is owned by 'enc' and is valid only until the next call to
|
||||
@@ -514,12 +514,12 @@ WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
|
||||
// Returns:
|
||||
// NULL if 'enc' is NULL. Otherwise, returns the error string if the last call
|
||||
// to 'enc' had an error, or an empty string if the last call was a success.
|
||||
WEBP_EXTERN(const char*) WebPAnimEncoderGetError(WebPAnimEncoder* enc);
|
||||
WEBP_EXTERN const char* WebPAnimEncoderGetError(WebPAnimEncoder* enc);
|
||||
|
||||
// Deletes the WebPAnimEncoder object.
|
||||
// Parameters:
|
||||
// enc - (in/out) object to be deleted
|
||||
WEBP_EXTERN(void) WebPAnimEncoderDelete(WebPAnimEncoder* enc);
|
||||
WEBP_EXTERN void WebPAnimEncoderDelete(WebPAnimEncoder* enc);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -527,4 +527,4 @@ WEBP_EXTERN(void) WebPAnimEncoderDelete(WebPAnimEncoder* enc);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_MUX_H_ */
|
||||
#endif // WEBP_WEBP_MUX_H_
|
||||
|
||||
@@ -95,4 +95,4 @@ static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_MUX_TYPES_H_ */
|
||||
#endif // WEBP_WEBP_MUX_TYPES_H_
|
||||
|
||||
@@ -40,13 +40,13 @@ typedef long long int int64_t;
|
||||
// This explicitly marks library functions and allows for changing the
|
||||
// signature for e.g., Windows DLL builds.
|
||||
# if defined(__GNUC__) && __GNUC__ >= 4
|
||||
# define WEBP_EXTERN(type) extern __attribute__ ((visibility ("default"))) type
|
||||
# define WEBP_EXTERN extern __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define WEBP_EXTERN(type) extern type
|
||||
# define WEBP_EXTERN extern
|
||||
# endif /* __GNUC__ >= 4 */
|
||||
#endif /* WEBP_EXTERN */
|
||||
|
||||
// Macro to check ABI compatibility (same major revision number)
|
||||
#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
|
||||
|
||||
#endif /* WEBP_WEBP_TYPES_H_ */
|
||||
#endif // WEBP_WEBP_TYPES_H_
|
||||
|
||||
Reference in New Issue
Block a user