mirror of
https://github.com/roytam1/palemoon27.git
synced 2026-05-26 14:18:48 +00:00
Issue #546 - Update Tycho to libvpx 1.4 - Part 1: Update the lib
This commit is contained in:
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
|
||||
enforcement activity against any entity (including a cross-claim or
|
||||
counterclaim in a lawsuit) alleging that any of these implementations of WebM
|
||||
or any code incorporated within any of these implementations of WebM
|
||||
constitutes direct or contributory patent infringement, or inducement of
|
||||
constitute direct or contributory patent infringement, or inducement of
|
||||
patent infringement, then any patent rights granted to you under this License
|
||||
for these implementations of WebM shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
||||
@@ -6,6 +6,6 @@ Mozilla build system.
|
||||
|
||||
The libvpx git repository is:
|
||||
|
||||
https://gerrit.chromium.org/gerrit/webm/libvpx
|
||||
https://chromium.googlesource.com/webm/libvpx
|
||||
|
||||
The git commit ID used was 587ff646f
|
||||
The git commit ID used was e67d45d4ce92468ba193288b59093fef0a502662
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
|
||||
--- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
|
||||
+++ b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
|
||||
@@ -27,21 +27,24 @@ DECLARE_ALIGNED(32, static const uint8_t
|
||||
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
|
||||
};
|
||||
|
||||
#if defined(__clang__)
|
||||
# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \
|
||||
- (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0)
|
||||
+ (defined(__APPLE__) && \
|
||||
+ (__clang_major__ == 4 && __clang_minor__ >= 0 && \
|
||||
+ __clang_minor__ <= 2) || \
|
||||
+ (__clang_major__ == 5 && __clang_minor__ == 0))
|
||||
# define MM256_BROADCASTSI128_SI256(x) \
|
||||
_mm_broadcastsi128_si256((__m128i const *)&(x))
|
||||
# else // clang > 3.3, and not 5.0 on macosx.
|
||||
# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
|
||||
# endif // clang <= 3.3
|
||||
#elif defined(__GNUC__)
|
||||
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
|
||||
# define MM256_BROADCASTSI128_SI256(x) \
|
||||
_mm_broadcastsi128_si256((__m128i const *)&(x))
|
||||
# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
|
||||
@@ -1,857 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
typedef enum {
|
||||
OUTPUT_FMT_PLAIN,
|
||||
OUTPUT_FMT_RVDS,
|
||||
OUTPUT_FMT_GAS,
|
||||
OUTPUT_FMT_C_HEADER,
|
||||
} output_fmt_t;
|
||||
|
||||
int log_msg(const char *fmt, ...) {
|
||||
int res;
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
res = vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
return res;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
|
||||
#if defined(FORCE_PARSE_ELF)
|
||||
|
||||
#if defined(__MACH__)
|
||||
#undef __MACH__
|
||||
#endif
|
||||
|
||||
#if !defined(__ELF__)
|
||||
#define __ELF__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__MACH__)
|
||||
|
||||
#include <mach-o/loader.h>
|
||||
#include <mach-o/nlist.h>
|
||||
|
||||
int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
|
||||
switch (mode) {
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n", name, val);
|
||||
return 0;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".set %-40s, %5d\n", name, val);
|
||||
return 0;
|
||||
case OUTPUT_FMT_C_HEADER:
|
||||
printf("#define %-40s %5d\n", name, val);
|
||||
return 0;
|
||||
default:
|
||||
log_msg("Unsupported mode: %d", mode);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
|
||||
int i, j;
|
||||
struct mach_header header;
|
||||
uint8_t *buf = base_buf;
|
||||
int base_data_section = 0;
|
||||
int bits = 0;
|
||||
|
||||
/* We can read in mach_header for 32 and 64 bit architectures
|
||||
* because it's identical to mach_header_64 except for the last
|
||||
* element (uint32_t reserved), which we don't use. Then, when
|
||||
* we know which architecture we're looking at, increment buf
|
||||
* appropriately.
|
||||
*/
|
||||
memcpy(&header, buf, sizeof(struct mach_header));
|
||||
|
||||
if (header.magic == MH_MAGIC) {
|
||||
if (header.cputype == CPU_TYPE_ARM
|
||||
|| header.cputype == CPU_TYPE_X86) {
|
||||
bits = 32;
|
||||
buf += sizeof(struct mach_header);
|
||||
} else {
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
|
||||
goto bail;
|
||||
}
|
||||
} else if (header.magic == MH_MAGIC_64) {
|
||||
if (header.cputype == CPU_TYPE_X86_64) {
|
||||
bits = 64;
|
||||
buf += sizeof(struct mach_header_64);
|
||||
} else {
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
|
||||
MH_MAGIC, MH_MAGIC_64, header.magic);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (header.filetype != MH_OBJECT) {
|
||||
log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
for (i = 0; i < header.ncmds; i++) {
|
||||
struct load_command lc;
|
||||
|
||||
memcpy(&lc, buf, sizeof(struct load_command));
|
||||
|
||||
if (lc.cmd == LC_SEGMENT) {
|
||||
uint8_t *seg_buf = buf;
|
||||
struct section s;
|
||||
struct segment_command seg_c;
|
||||
|
||||
memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
|
||||
seg_buf += sizeof(struct segment_command);
|
||||
|
||||
/* Although each section is given it's own offset, nlist.n_value
|
||||
* references the offset of the first section. This isn't
|
||||
* apparent without debug information because the offset of the
|
||||
* data section is the same as the first section. However, with
|
||||
* debug sections mixed in, the offset of the debug section
|
||||
* increases but n_value still references the first section.
|
||||
*/
|
||||
if (seg_c.nsects < 1) {
|
||||
log_msg("Not enough sections\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&s, seg_buf, sizeof(struct section));
|
||||
base_data_section = s.offset;
|
||||
} else if (lc.cmd == LC_SEGMENT_64) {
|
||||
uint8_t *seg_buf = buf;
|
||||
struct section_64 s;
|
||||
struct segment_command_64 seg_c;
|
||||
|
||||
memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
|
||||
seg_buf += sizeof(struct segment_command_64);
|
||||
|
||||
/* Explanation in LG_SEGMENT */
|
||||
if (seg_c.nsects < 1) {
|
||||
log_msg("Not enough sections\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&s, seg_buf, sizeof(struct section_64));
|
||||
base_data_section = s.offset;
|
||||
} else if (lc.cmd == LC_SYMTAB) {
|
||||
if (base_data_section != 0) {
|
||||
struct symtab_command sc;
|
||||
uint8_t *sym_buf = base_buf;
|
||||
uint8_t *str_buf = base_buf;
|
||||
|
||||
memcpy(&sc, buf, sizeof(struct symtab_command));
|
||||
|
||||
if (sc.cmdsize != sizeof(struct symtab_command)) {
|
||||
log_msg("Can't find symbol table!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
sym_buf += sc.symoff;
|
||||
str_buf += sc.stroff;
|
||||
|
||||
for (j = 0; j < sc.nsyms; j++) {
|
||||
/* Location of string is cacluated each time from the
|
||||
* start of the string buffer. On darwin the symbols
|
||||
* are prefixed by "_", so we bump the pointer by 1.
|
||||
* The target value is defined as an int in *_asm_*_offsets.c,
|
||||
* which is 4 bytes on all targets we currently use.
|
||||
*/
|
||||
if (bits == 32) {
|
||||
struct nlist nl;
|
||||
int val;
|
||||
|
||||
memcpy(&nl, sym_buf, sizeof(struct nlist));
|
||||
sym_buf += sizeof(struct nlist);
|
||||
|
||||
memcpy(&val, base_buf + base_data_section + nl.n_value,
|
||||
sizeof(val));
|
||||
print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
|
||||
} else { /* if (bits == 64) */
|
||||
struct nlist_64 nl;
|
||||
int val;
|
||||
|
||||
memcpy(&nl, sym_buf, sizeof(struct nlist_64));
|
||||
sym_buf += sizeof(struct nlist_64);
|
||||
|
||||
memcpy(&val, base_buf + base_data_section + nl.n_value,
|
||||
sizeof(val));
|
||||
print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buf += lc.cmdsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
#elif defined(__ELF__)
|
||||
#include "elf.h"
|
||||
|
||||
#define COPY_STRUCT(dst, buf, ofst, sz) do {\
|
||||
if(ofst + sizeof((*(dst))) > sz) goto bail;\
|
||||
memcpy(dst, buf+ofst, sizeof((*(dst))));\
|
||||
} while(0)
|
||||
|
||||
#define ENDIAN_ASSIGN(val, memb) do {\
|
||||
if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
|
||||
(val) = (memb);\
|
||||
} while(0)
|
||||
|
||||
#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
|
||||
ENDIAN_ASSIGN(memb, memb);\
|
||||
} while(0)
|
||||
|
||||
typedef struct {
|
||||
uint8_t *buf; /* Buffer containing ELF data */
|
||||
size_t sz; /* Buffer size */
|
||||
int le_data; /* Data is little-endian */
|
||||
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
|
||||
int bits; /* 32 or 64 */
|
||||
Elf32_Ehdr hdr32;
|
||||
Elf64_Ehdr hdr64;
|
||||
} elf_obj_t;
|
||||
|
||||
int parse_elf_header(elf_obj_t *elf) {
|
||||
int res;
|
||||
/* Verify ELF Magic numbers */
|
||||
COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
|
||||
res = elf->e_ident[EI_MAG0] == ELFMAG0;
|
||||
res &= elf->e_ident[EI_MAG1] == ELFMAG1;
|
||||
res &= elf->e_ident[EI_MAG2] == ELFMAG2;
|
||||
res &= elf->e_ident[EI_MAG3] == ELFMAG3;
|
||||
res &= elf->e_ident[EI_CLASS] == ELFCLASS32
|
||||
|| elf->e_ident[EI_CLASS] == ELFCLASS64;
|
||||
res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
|
||||
if (!res) goto bail;
|
||||
|
||||
elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
|
||||
/* Read in relevant values */
|
||||
if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
|
||||
elf->bits = 32;
|
||||
COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
|
||||
} else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
|
||||
elf->bits = 64;
|
||||
COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
log_msg("Failed to parse ELF file header");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
|
||||
if (hdr32) {
|
||||
if (idx >= elf->hdr32.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
|
||||
} else { /* if (hdr64) */
|
||||
if (idx >= elf->hdr64.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
|
||||
if (elf->bits == 32) {
|
||||
Elf32_Shdr shdr;
|
||||
|
||||
if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
} else { /* if (elf->bits == 64) */
|
||||
Elf64_Shdr shdr;
|
||||
|
||||
if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
}
|
||||
}
|
||||
|
||||
int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
|
||||
if (sym32) {
|
||||
COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
|
||||
} else { /* if (sym64) */
|
||||
COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
|
||||
}
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
|
||||
elf_obj_t elf;
|
||||
unsigned int ofst;
|
||||
int i;
|
||||
Elf32_Off strtab_off32;
|
||||
Elf64_Off strtab_off64; /* save String Table offset for later use */
|
||||
|
||||
memset(&elf, 0, sizeof(elf));
|
||||
elf.buf = buf;
|
||||
elf.sz = sz;
|
||||
|
||||
/* Parse Header */
|
||||
if (parse_elf_header(&elf))
|
||||
goto bail;
|
||||
|
||||
if (elf.bits == 32) {
|
||||
Elf32_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr32.e_shnum; i++) {
|
||||
parse_elf_section(&elf, i, &shdr, NULL);
|
||||
|
||||
if (shdr.sh_type == SHT_STRTAB) {
|
||||
char strtsb_name[128];
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab"))) {
|
||||
/* log_msg("found section: %s\n", strtsb_name); */
|
||||
strtab_off32 = shdr.sh_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { /* if (elf.bits == 64) */
|
||||
Elf64_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr64.e_shnum; i++) {
|
||||
parse_elf_section(&elf, i, NULL, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_STRTAB) {
|
||||
char strtsb_name[128];
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab"))) {
|
||||
/* log_msg("found section: %s\n", strtsb_name); */
|
||||
strtab_off64 = shdr.sh_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse all Symbol Tables */
|
||||
if (elf.bits == 32) {
|
||||
Elf32_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr32.e_shnum; i++) {
|
||||
parse_elf_section(&elf, i, &shdr, NULL);
|
||||
|
||||
if (shdr.sh_type == SHT_SYMTAB) {
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize) {
|
||||
Elf32_Sym sym;
|
||||
|
||||
parse_elf_symbol(&elf, ofst, &sym, NULL);
|
||||
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
/* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
*/
|
||||
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4) {
|
||||
Elf32_Shdr dhdr;
|
||||
int val = 0;
|
||||
char section_name[128];
|
||||
|
||||
parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
|
||||
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
|
||||
|
||||
if (strcmp(section_name, ".bss")) {
|
||||
if (sizeof(val) != sym.st_size) {
|
||||
/* The target value is declared as an int in
|
||||
* *_asm_*_offsets.c, which is 4 bytes on all
|
||||
* targets we currently use. Complain loudly if
|
||||
* this is not true.
|
||||
*/
|
||||
log_msg("Symbol size is wrong\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sym.st_size);
|
||||
}
|
||||
|
||||
if (!elf.le_data) {
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_C_HEADER:
|
||||
printf("#define %-40s %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { /* if (elf.bits == 64) */
|
||||
Elf64_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr64.e_shnum; i++) {
|
||||
parse_elf_section(&elf, i, NULL, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_SYMTAB) {
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize) {
|
||||
Elf64_Sym sym;
|
||||
|
||||
parse_elf_symbol(&elf, ofst, NULL, &sym);
|
||||
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
/* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
*/
|
||||
|
||||
if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4) {
|
||||
Elf64_Shdr dhdr;
|
||||
int val = 0;
|
||||
char section_name[128];
|
||||
|
||||
parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
|
||||
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
|
||||
|
||||
if ((strcmp(section_name, ".bss"))) {
|
||||
if (sizeof(val) != sym.st_size) {
|
||||
/* The target value is declared as an int in
|
||||
* *_asm_*_offsets.c, which is 4 bytes on all
|
||||
* targets we currently use. Complain loudly if
|
||||
* this is not true.
|
||||
*/
|
||||
log_msg("Symbol size is wrong\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sym.st_size);
|
||||
}
|
||||
|
||||
if (!elf.le_data) {
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == OUTPUT_FMT_RVDS)
|
||||
printf(" END\n");
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* defined(__GNUC__) && __GNUC__ */
|
||||
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
|
||||
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
||||
for reference.
|
||||
*/
|
||||
#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
|
||||
#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
|
||||
|
||||
int parse_coff(uint8_t *buf, size_t sz) {
|
||||
unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
|
||||
unsigned int sectionrawdata_ptr;
|
||||
unsigned int i;
|
||||
uint8_t *ptr;
|
||||
uint32_t symoffset;
|
||||
|
||||
char **sectionlist; // this array holds all section names in their correct order.
|
||||
// it is used to check if the symbol is in .bss or .rdata section.
|
||||
|
||||
nsections = get_le16(buf + 2);
|
||||
symtab_ptr = get_le32(buf + 8);
|
||||
symtab_sz = get_le32(buf + 12);
|
||||
strtab_ptr = symtab_ptr + symtab_sz * 18;
|
||||
|
||||
if (nsections > 96) {
|
||||
log_msg("Too many sections\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
sectionlist = malloc(nsections * sizeof(sectionlist));
|
||||
|
||||
if (sectionlist == NULL) {
|
||||
log_msg("Allocating first level of section list failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
|
||||
|
||||
/*
|
||||
The size of optional header is always zero for an obj file. So, the section header
|
||||
follows the file header immediately.
|
||||
*/
|
||||
|
||||
ptr = buf + 20; // section header
|
||||
|
||||
for (i = 0; i < nsections; i++) {
|
||||
char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(sectionname, ptr, 8);
|
||||
// log_msg("COFF: Parsing section %s\n",sectionname);
|
||||
|
||||
sectionlist[i] = malloc(strlen(sectionname) + 1);
|
||||
|
||||
if (sectionlist[i] == NULL) {
|
||||
log_msg("Allocating storage for %s failed\n", sectionname);
|
||||
goto bail;
|
||||
}
|
||||
strcpy(sectionlist[i], sectionname);
|
||||
|
||||
// check if it's .rdata and is not a COMDAT section.
|
||||
if (!strcmp(sectionname, ".rdata") &&
|
||||
(get_le32(ptr + 36) & 0x1000) == 0) {
|
||||
sectionrawdata_ptr = get_le32(ptr + 20);
|
||||
}
|
||||
|
||||
ptr += 40;
|
||||
}
|
||||
|
||||
// log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
// log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
|
||||
|
||||
/* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
|
||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||
the local variables (but not functions) that were declared with the static keyword,
|
||||
except for those with non-zero initial values. (In C, static variables are initialized
|
||||
to zero by default.) It also contains the non-local (both extern and static) variables
|
||||
that are also initialized to zero (either explicitly or by default).
|
||||
*/
|
||||
// move to symbol table
|
||||
/* COFF symbol table:
|
||||
offset field
|
||||
0 Name(*)
|
||||
8 Value
|
||||
12 SectionNumber
|
||||
14 Type
|
||||
16 StorageClass
|
||||
17 NumberOfAuxSymbols
|
||||
*/
|
||||
ptr = buf + symtab_ptr;
|
||||
|
||||
for (i = 0; i < symtab_sz; i++) {
|
||||
int16_t section = get_le16(ptr + 12); // section number
|
||||
|
||||
if (section > 0 && ptr[16] == 2) {
|
||||
// if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
|
||||
|
||||
if (get_le32(ptr)) {
|
||||
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(name, ptr, 8);
|
||||
// log_msg("COFF: Parsing symbol %s\n",name);
|
||||
/* The 64bit Windows compiler doesn't prefix with an _.
|
||||
* Check what's there, and bump if necessary
|
||||
*/
|
||||
if (name[0] == '_')
|
||||
printf("%-40s EQU ", name + 1);
|
||||
else
|
||||
printf("%-40s EQU ", name);
|
||||
} else {
|
||||
// log_msg("COFF: Parsing symbol %s\n",
|
||||
// buf + strtab_ptr + get_le32(ptr+4));
|
||||
if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
|
||||
printf("%-40s EQU ",
|
||||
buf + strtab_ptr + get_le32(ptr + 4) + 1);
|
||||
else
|
||||
printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
||||
}
|
||||
|
||||
if (!(strcmp(sectionlist[section - 1], ".bss"))) {
|
||||
symoffset = 0;
|
||||
} else {
|
||||
symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
|
||||
}
|
||||
|
||||
// log_msg(" Section: %d\n",section);
|
||||
// log_msg(" Class: %d\n",ptr[16]);
|
||||
// log_msg(" Address: %u\n",get_le32(ptr+8));
|
||||
// log_msg(" Offset: %u\n", symoffset);
|
||||
|
||||
printf("%5d\n", symoffset);
|
||||
}
|
||||
|
||||
ptr += 18;
|
||||
}
|
||||
|
||||
printf(" END\n");
|
||||
|
||||
for (i = 0; i < nsections; i++) {
|
||||
free(sectionlist[i]);
|
||||
}
|
||||
|
||||
free(sectionlist);
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
|
||||
for (i = 0; i < nsections; i++) {
|
||||
free(sectionlist[i]);
|
||||
}
|
||||
|
||||
free(sectionlist);
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
output_fmt_t mode = OUTPUT_FMT_PLAIN;
|
||||
const char *f;
|
||||
uint8_t *file_buf;
|
||||
int res;
|
||||
FILE *fp;
|
||||
long int file_size;
|
||||
|
||||
if (argc < 2 || argc > 3) {
|
||||
fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
|
||||
fprintf(stderr, " <obj file>\tobject file to parse\n");
|
||||
fprintf(stderr, "Output Formats:\n");
|
||||
fprintf(stderr, " gas - compatible with GNU assembler\n");
|
||||
fprintf(stderr, " rvds - compatible with armasm\n");
|
||||
fprintf(stderr, " cheader - c/c++ header file\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
f = argv[2];
|
||||
|
||||
if (!strcmp(argv[1], "rvds"))
|
||||
mode = OUTPUT_FMT_RVDS;
|
||||
else if (!strcmp(argv[1], "gas"))
|
||||
mode = OUTPUT_FMT_GAS;
|
||||
else if (!strcmp(argv[1], "cheader"))
|
||||
mode = OUTPUT_FMT_C_HEADER;
|
||||
else
|
||||
f = argv[1];
|
||||
|
||||
fp = fopen(f, "rb");
|
||||
|
||||
if (!fp) {
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (fseek(fp, 0, SEEK_END)) {
|
||||
perror("stat");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
file_size = ftell(fp);
|
||||
file_buf = malloc(file_size);
|
||||
|
||||
if (!file_buf) {
|
||||
perror("malloc");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
rewind(fp);
|
||||
|
||||
if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
|
||||
perror("read");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (fclose(fp)) {
|
||||
perror("close");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#if defined(__MACH__)
|
||||
res = parse_macho(file_buf, file_size, mode);
|
||||
#elif defined(__ELF__)
|
||||
res = parse_elf(file_buf, file_size, mode);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
|
||||
res = parse_coff(file_buf, file_size);
|
||||
#endif
|
||||
|
||||
free(file_buf);
|
||||
|
||||
if (!res)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
bail:
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
From: Jacek Caban <jacek@codeweavers.com>
|
||||
Don't use pthread for libvpx in mingw builds.
|
||||
|
||||
|
||||
diff --git a/media/libvpx/vpx_config_x86-win32-gcc.h b/media/libvpx/vpx_config_x86-win32-gcc.h
|
||||
index 5bc3efb..e60f84d 100644
|
||||
--- a/media/libvpx/vpx_config_x86-win32-gcc.h
|
||||
+++ b/media/libvpx/vpx_config_x86-win32-gcc.h
|
||||
@@ -32,7 +32,8 @@
|
||||
#define HAVE_AVX2 1
|
||||
#define HAVE_VPX_PORTS 1
|
||||
#define HAVE_STDINT_H 1
|
||||
-#define HAVE_PTHREAD_H 1
|
||||
+#undef HAVE_PTHREAD_H
|
||||
+#define HAVE_PTHREAD_H 0
|
||||
#define HAVE_SYS_MMAN_H 1
|
||||
#define HAVE_UNISTD_H 1
|
||||
#define CONFIG_DEPENDENCY_TRACKING 1
|
||||
diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.h b/media/libvpx/vpx_config_x86_64-win64-gcc.h
|
||||
index 4ff4339..b056a0e 100644
|
||||
--- a/media/libvpx/vpx_config_x86_64-win64-gcc.h
|
||||
+++ b/media/libvpx/vpx_config_x86_64-win64-gcc.h
|
||||
@@ -32,7 +32,8 @@
|
||||
#define HAVE_AVX2 1
|
||||
#define HAVE_VPX_PORTS 1
|
||||
#define HAVE_STDINT_H 1
|
||||
-#define HAVE_PTHREAD_H 1
|
||||
+#undef HAVE_PTHREAD_H
|
||||
+#define HAVE_PTHREAD_H 0
|
||||
#define HAVE_SYS_MMAN_H 1
|
||||
#define HAVE_UNISTD_H 1
|
||||
#define CONFIG_DEPENDENCY_TRACKING 1
|
||||
@@ -1,24 +0,0 @@
|
||||
diff --git a/media/libvpx/vp9/common/vp9_systemdependent.h b/media/libvpx/vp9/common/vp9_systemdependent.h
|
||||
--- a/media/libvpx/vp9/common/vp9_systemdependent.h
|
||||
+++ b/media/libvpx/vp9/common/vp9_systemdependent.h
|
||||
@@ -12,17 +12,19 @@
|
||||
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <math.h> // the ceil() definition must precede intrin.h
|
||||
# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
|
||||
# include <intrin.h>
|
||||
# define USE_MSC_INTRIN
|
||||
# endif
|
||||
-# define snprintf _snprintf
|
||||
+# if _MSC_VER < 1900
|
||||
+# define snprintf _snprintf
|
||||
+# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
+#if !defined(VPX_DONT_DEFINE_STDINT_TYPES)
|
||||
+
|
||||
#if defined(_MSC_VER)
|
||||
#define VPX_FORCE_INLINE __forceinline
|
||||
#define VPX_INLINE __inline
|
||||
#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES)
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
@@ -56,6 +58,8 @@
|
||||
|
||||
#endif
|
||||
|
||||
+15
-2
@@ -36,7 +36,9 @@
|
||||
|
||||
%include "vpx_config.asm"
|
||||
|
||||
%ifndef program_name
|
||||
%define program_name vp9
|
||||
%endif
|
||||
|
||||
|
||||
%define UNIX64 0
|
||||
@@ -78,6 +80,9 @@
|
||||
%macro SECTION_RODATA 0-1 16
|
||||
%ifidn __OUTPUT_FORMAT__,macho64
|
||||
SECTION .text align=%1
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
SECTION .text align=%1
|
||||
fakegot:
|
||||
%elifidn __OUTPUT_FORMAT__,macho
|
||||
SECTION .text align=%1
|
||||
fakegot:
|
||||
@@ -617,9 +622,17 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
%elifidn __OUTPUT_FORMAT__,elf64
|
||||
global %1:function hidden
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
global %1:private_extern
|
||||
%ifdef __NASM_VER__
|
||||
global %1
|
||||
%else
|
||||
global %1:private_extern
|
||||
%endif
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
global %1:private_extern
|
||||
%ifdef __NASM_VER__
|
||||
global %1
|
||||
%else
|
||||
global %1:private_extern
|
||||
%endif
|
||||
%else
|
||||
global %1
|
||||
%endif
|
||||
|
||||
+100
-44
@@ -27,12 +27,13 @@ PLATFORMS= [
|
||||
|
||||
mk_files = [
|
||||
'vp8/vp8_common.mk',
|
||||
'vp8/vp8cx_arm.mk',
|
||||
'vp8/vp8cx.mk',
|
||||
'vp8/vp8dx.mk',
|
||||
'vp8/vp8cx_arm.mk',
|
||||
'vp9/vp9_common.mk',
|
||||
'vp9/vp9cx.mk',
|
||||
'vp9/vp9dx.mk',
|
||||
'vpx_dsp/vpx_dsp.mk',
|
||||
'vpx_mem/vpx_mem.mk',
|
||||
'vpx_ports/vpx_ports.mk',
|
||||
'vpx_scale/vpx_scale.mk',
|
||||
@@ -42,12 +43,14 @@ mk_files = [
|
||||
extensions = ['.asm', '.c', '.h']
|
||||
|
||||
MODULES = {
|
||||
'UNIFIED_SOURCES': [
|
||||
'SOURCES': [
|
||||
'API_DOC_SRCS-$(CONFIG_VP8_DECODER)',
|
||||
'API_DOC_SRCS-yes',
|
||||
'API_EXPORTS',
|
||||
'API_SRCS-$(CONFIG_VP8_DECODER)',
|
||||
'API_SRCS-yes',
|
||||
'DSP_SRCS-yes',
|
||||
'DSP_SRCS-yes+$(CONFIG_ENCODERS)',
|
||||
'MEM_SRCS-yes',
|
||||
'PORTS_SRCS-yes',
|
||||
'SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING)',
|
||||
@@ -77,6 +80,7 @@ MODULES = {
|
||||
'VP8_CX_SRCS-no',
|
||||
'VP8_CX_SRCS_REMOVE-no',
|
||||
'VP8_CX_SRCS_REMOVE-yes',
|
||||
'VP8_CX_SRCS_REMOVE-yes+$(CONFIG_REALTIME_ONLY)',
|
||||
'VP8_CX_SRCS-yes',
|
||||
'VP9_CX_EXPORTS',
|
||||
'VP9_CX_SRCS-no',
|
||||
@@ -85,35 +89,59 @@ MODULES = {
|
||||
'VP9_CX_SRCS-yes',
|
||||
],
|
||||
'X86_ASM': [
|
||||
'DSP_SRCS-$(HAVE_MMX)',
|
||||
'DSP_SRCS-$(HAVE_MMX)+$(CONFIG_ENCODERS)',
|
||||
'DSP_SRCS-$(HAVE_SSE2)',
|
||||
'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_ENCODERS)',
|
||||
'DSP_SRCS-$(HAVE_SSE3)+$(CONFIG_ENCODERS)',
|
||||
'DSP_SRCS-$(HAVE_SSE4_1)+$(CONFIG_ENCODERS)',
|
||||
'DSP_SRCS-$(HAVE_SSSE3)+$(CONFIG_ENCODERS)',
|
||||
'PORTS_SRCS-$(BUILD_LIBVPX)',
|
||||
'PORTS_SRCS-$(BUILD_LIBVPX)+$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'PORTS_SRCS-yes+$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_MMX)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_MMX)+$(CONFIG_POSTPROC)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSE2)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_POSTPROC)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSE3)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSE4_1)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSSE3)',
|
||||
'VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_MMX)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSE2)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSSE3)',
|
||||
'VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP8_CX_SRCS-$(HAVE_MMX)',
|
||||
'VP8_CX_SRCS-$(HAVE_SSE2)',
|
||||
'VP8_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_TEMPORAL_DENOISING)',
|
||||
'VP8_CX_SRCS-$(HAVE_SSE4_1)',
|
||||
'VP8_CX_SRCS-$(HAVE_SSSE3)',
|
||||
'VP8_CX_SRCS_REMOVE-$(HAVE_SSE2)',
|
||||
'VP8_CX_SRCS_REMOVE-$(HAVE_SSE2)+$(CONFIG_REALTIME_ONLY)',
|
||||
'VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_MMX)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSE2)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_USE_X86INC)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSSE3)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSSE3)+$(CONFIG_USE_X86INC)',
|
||||
'VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
|
||||
'VP9_CX_SRCS-$(HAVE_MMX)',
|
||||
'VP9_CX_SRCS-$(HAVE_MMX)+$(CONFIG_USE_X86INC)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE2)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_USE_X86INC)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE3)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE4_1)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSSE3)',
|
||||
],
|
||||
'X86-64_ASM': [
|
||||
'VP8_COMMON_SRCS-$(HAVE_SSE2)+$(ARCH_X86_64)',
|
||||
'VP8_CX_SRCS-$(ARCH_X86_64)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSSE3)+$(ARCH_X86_64)',
|
||||
'VP9_CX_SRCS-$(ARCH_X86_64)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSSE3)+$(ARCH_X86_64)',
|
||||
],
|
||||
'ARM_ASM': [
|
||||
'DSP_SRCS-$(HAVE_MEDIA)',
|
||||
'DSP_SRCS-$(HAVE_MEDIA)+$(CONFIG_ENCODERS)',
|
||||
'DSP_SRCS-$(HAVE_NEON)',
|
||||
'DSP_SRCS-$(HAVE_NEON)+$(CONFIG_ENCODERS)',
|
||||
'PORTS_SRCS-$(ARCH_ARM)',
|
||||
'SCALE_SRCS-$(HAVE_NEON)',
|
||||
'VP8_COMMON_SRCS-$(ARCH_ARM)',
|
||||
@@ -121,6 +149,7 @@ MODULES = {
|
||||
'VP8_COMMON_SRCS-$(HAVE_NEON)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_NEON)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_NEON_ASM)',
|
||||
'VP9_COMMON_SRCS-yes+$(HAVE_NEON_ASM)',
|
||||
'VP8_CX_SRCS-$(ARCH_ARM)',
|
||||
'VP8_CX_SRCS-$(HAVE_EDSP)',
|
||||
'VP8_CX_SRCS-$(HAVE_MEDIA)',
|
||||
@@ -132,6 +161,8 @@ MODULES = {
|
||||
'VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT)',
|
||||
],
|
||||
'AVX2': [
|
||||
'DSP_SRCS-$(HAVE_AVX2)',
|
||||
'DSP_SRCS-$(HAVE_AVX2)+$(CONFIG_ENCODERS)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_AVX2)',
|
||||
'VP9_CX_SRCS-$(HAVE_AVX2)',
|
||||
],
|
||||
@@ -140,28 +171,43 @@ MODULES = {
|
||||
],
|
||||
'VP9_POSTPROC': [
|
||||
'VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_POSTPROC)',
|
||||
]
|
||||
}
|
||||
|
||||
DISABLED_MODULES = [
|
||||
'API_SRCS-$(CONFIG_SPATIAL_SVC)',
|
||||
'API_SRCS-$(CONFIG_SPATIAL_SVC)+$(CONFIG_VP9_ENCODER)',
|
||||
'MEM_SRCS-$(CONFIG_MEM_MANAGER)',
|
||||
'MEM_SRCS-$(CONFIG_MEM_TRACKER)',
|
||||
'VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)',
|
||||
'VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)',
|
||||
'VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS)',
|
||||
'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)',
|
||||
'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)+$(CONFIG_VP9_POSTPROC)',
|
||||
'VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_TEMPORAL_DENOISING)',
|
||||
|
||||
# VP9_HIGHBITDEPTH
|
||||
'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
|
||||
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
|
||||
|
||||
# use asm implementations instead of intrinsics
|
||||
# neon exists as assembly and intrinsics implementations.
|
||||
# If both are available prefer assembly (HAVE_NEON_ASM)
|
||||
'VP9_COMMON_SRCS-yes+$(HAVE_NEON)',
|
||||
|
||||
# mips files are also ignored via ignored_folders
|
||||
'SCALE_SRCS-$(HAVE_DSPR2)',
|
||||
'VP8_COMMON_SRCS-$(HAVE_DSPR2)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_DSPR2)',
|
||||
'VP8_CX_SRCS_REMOVE-$(HAVE_EDSP)',
|
||||
'VP9_COMMON_SRCS-$(HAVE_MSA)',
|
||||
'VP9_CX_SRCS-$(HAVE_MSA)',
|
||||
]
|
||||
|
||||
libvpx_files = [
|
||||
'build/make/obj_int_extract.c',
|
||||
'build/make/ads2gas.pl',
|
||||
'build/make/thumb.pm',
|
||||
'LICENSE',
|
||||
@@ -195,6 +241,16 @@ ignore_folders = [
|
||||
'test/',
|
||||
'vpx_mem/memory_manager/',
|
||||
]
|
||||
|
||||
rename_files = {
|
||||
#avoid clash between vpx_dsp/x86 and vp8/common/x86
|
||||
'vp8/common/x86/variance_mmx.c': 'vp8/common/x86/vp8_variance_mmx.c',
|
||||
'vp8/common/x86/variance_sse2.c': 'vp8/common/x86/vp8_variance_sse2.c',
|
||||
'vp8/common/x86/variance_impl_mmx.asm': 'vp8/common/x86/vp8_variance_impl_mmx.asm',
|
||||
#avoid clash with common/arm/neon/vp9_avg_neon.c
|
||||
'vp9/encoder/arm/neon/vp9_avg_neon.c': 'vp9/encoder/arm/neon/vp9enc_avg_neon.c',
|
||||
}
|
||||
|
||||
files = {
|
||||
'EXPORTS': [
|
||||
'vpx_mem/include/vpx_mem_intrnl.h',
|
||||
@@ -217,12 +273,9 @@ files = {
|
||||
],
|
||||
'X86-64_ASM': [
|
||||
'third_party/x86inc/x86inc.asm',
|
||||
'vp8/common/x86/loopfilter_block_sse2_x86_64.asm',
|
||||
'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
|
||||
],
|
||||
'SOURCES': [
|
||||
'vp8/common/rtcd.c',
|
||||
'vp8/common/sad_c.c',
|
||||
'vp8/encoder/bitstream.c',
|
||||
'vp8/encoder/onyx_if.c',
|
||||
'vp8/vp8_dx_iface.c',
|
||||
@@ -249,26 +302,18 @@ files = {
|
||||
'vp9/common/vp9_scale.c',
|
||||
'vp9/common/vp9_scan.c',
|
||||
'vp9/common/vp9_seg_common.c',
|
||||
'vp9/common/vp9_thread.c',
|
||||
'vp9/common/vp9_tile_common.c',
|
||||
'vp9/decoder/vp9_decodeframe.c',
|
||||
'vp9/decoder/vp9_decodemv.c',
|
||||
'vp9/decoder/vp9_decoder.c',
|
||||
'vp9/decoder/vp9_detokenize.c',
|
||||
'vp9/decoder/vp9_dsubexp.c',
|
||||
'vp9/decoder/vp9_dthread.c',
|
||||
'vp9/decoder/vp9_reader.c',
|
||||
'vp9/encoder/vp9_bitstream.c',
|
||||
'vp9/encoder/vp9_aq_complexity.c',
|
||||
'vp9/encoder/vp9_aq_cyclicrefresh.c',
|
||||
'vp9/encoder/vp9_aq_variance.c',
|
||||
'vp9/encoder/vp9_context_tree.c',
|
||||
'vp9/encoder/vp9_cost.c',
|
||||
'vp9/encoder/vp9_dct.c',
|
||||
'vp9/encoder/vp9_encodeframe.c',
|
||||
'vp9/encoder/vp9_encodemb.c',
|
||||
'vp9/encoder/vp9_encodemv.c',
|
||||
'vp9/encoder/vp9_encoder.c',
|
||||
'vp9/encoder/vp9_extend.c',
|
||||
'vp9/encoder/vp9_firstpass.c',
|
||||
'vp9/encoder/vp9_lookahead.c',
|
||||
@@ -278,23 +323,17 @@ files = {
|
||||
'vp9/encoder/vp9_pickmode.c',
|
||||
'vp9/encoder/vp9_quantize.c',
|
||||
'vp9/encoder/vp9_ratectrl.c',
|
||||
'vp9/encoder/vp9_rd.c',
|
||||
'vp9/encoder/vp9_rdopt.c',
|
||||
'vp9/encoder/vp9_resize.c',
|
||||
'vp9/encoder/vp9_sad.c',
|
||||
'vp9/encoder/vp9_segmentation.c',
|
||||
'vp9/encoder/vp9_speed_features.c',
|
||||
'vp9/encoder/vp9_subexp.c',
|
||||
'vp9/encoder/vp9_svc_layercontext.c',
|
||||
'vp9/encoder/vp9_temporal_filter.c',
|
||||
'vp9/encoder/vp9_tokenize.c',
|
||||
'vp9/encoder/vp9_treewriter.c',
|
||||
'vp9/encoder/vp9_variance.c',
|
||||
'vp9/encoder/vp9_write_bit_buffer.c',
|
||||
'vp9/encoder/vp9_writer.c',
|
||||
'vp9/vp9_cx_iface.c',
|
||||
'vp9/vp9_dx_iface.c',
|
||||
'vpx/src/svc_encodeframe.c',
|
||||
'vpx/src/vpx_encoder.c',
|
||||
'vpx_mem/vpx_mem.c',
|
||||
'vpx_scale/vpx_scale_rtcd.c',
|
||||
@@ -307,9 +346,9 @@ manual = [
|
||||
# special case in moz.build
|
||||
'vp8/encoder/boolhuff.c',
|
||||
|
||||
# 64bit only
|
||||
'vp8/common/x86/loopfilter_block_sse2_x86_64.asm',
|
||||
'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
|
||||
# These 64-bit only files end up in X86_ASM. Filter them out.
|
||||
'vp8/common/x86/loopfilter_block_sse2.asm',
|
||||
'vp9/encoder/x86/vp9_quantize_ssse3.asm',
|
||||
|
||||
# offsets are special cased in Makefile.in
|
||||
'vp8/encoder/vp8_asm_enc_offsets.c',
|
||||
@@ -333,27 +372,28 @@ platform_files = [
|
||||
'vpx_config.asm',
|
||||
'vpx_config.h',
|
||||
'vpx_scale_rtcd.h',
|
||||
'vpx_dsp_rtcd.h',
|
||||
]
|
||||
|
||||
def prepare_upstream(prefix, commit=None):
|
||||
upstream_url = 'https://chromium.googlesource.com/webm/libvpx'
|
||||
if os.path.exists(prefix):
|
||||
print "Please remove '%s' folder before running %s" % (prefix, sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
upstream_url = 'https://gerrit.chromium.org/gerrit/webm/libvpx'
|
||||
subprocess.call(['git', 'clone', upstream_url, prefix])
|
||||
if commit:
|
||||
os.chdir(prefix)
|
||||
subprocess.call(['git', 'fetch', 'origin'])
|
||||
else:
|
||||
subprocess.call(['git', 'clone', upstream_url, prefix])
|
||||
os.chdir(prefix)
|
||||
if commit:
|
||||
subprocess.call(['git', 'checkout', commit])
|
||||
else:
|
||||
os.chdir(prefix)
|
||||
p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
commit = stdout.strip()
|
||||
|
||||
for target in PLATFORMS:
|
||||
target_objdir = os.path.join(prefix, 'objdir', target)
|
||||
os.makedirs(target_objdir)
|
||||
if not os.path.exists(target_objdir):
|
||||
os.makedirs(target_objdir)
|
||||
os.chdir(target_objdir)
|
||||
configure = ['../../configure', '--target=%s' % target,
|
||||
'--disable-examples', '--disable-install-docs',
|
||||
@@ -372,10 +412,14 @@ def prepare_upstream(prefix, commit=None):
|
||||
|
||||
if target == 'armv7-android-gcc':
|
||||
configure += ['--sdk-path=%s' % ndk_path]
|
||||
|
||||
print "\n" + target_objdir
|
||||
print " ".join(configure)
|
||||
sys.stdout.flush()
|
||||
subprocess.call(configure)
|
||||
make_targets = [f for f in platform_files if not os.path.exists(f)]
|
||||
if make_targets:
|
||||
print " ".join(['make'] + make_targets)
|
||||
sys.stdout.flush()
|
||||
subprocess.call(['make'] + make_targets)
|
||||
for f in make_targets:
|
||||
if not os.path.exists(f):
|
||||
@@ -386,7 +430,7 @@ def prepare_upstream(prefix, commit=None):
|
||||
return commit
|
||||
|
||||
def cleanup_upstream():
|
||||
shutil.rmtree(os.path.join(base, 'upstream'))
|
||||
shutil.rmtree(os.path.join(base, 'upstream/objdir'))
|
||||
|
||||
def get_module(key):
|
||||
for module in MODULES:
|
||||
@@ -412,10 +456,17 @@ def get_sources(prefix):
|
||||
for mk in mk_files:
|
||||
with open(os.path.join(prefix, mk)) as f:
|
||||
base = os.path.dirname(mk)
|
||||
extra = ''
|
||||
for l in f:
|
||||
m = re.compile('ifeq +\((.*?\)), *yes\)').findall(l)
|
||||
if m:
|
||||
extra = '+' + m[0]
|
||||
if extra and l.startswith('else') or l.startswith('endif'):
|
||||
extra = ''
|
||||
if '+=' in l:
|
||||
l = l.split('+=')
|
||||
key = l[0].strip()
|
||||
key += extra
|
||||
value = l[1].strip().replace('$(ASM)', '.asm')
|
||||
value = os.path.join(base, value)
|
||||
if not key.startswith('#') and os.path.splitext(value)[-1] in extensions:
|
||||
@@ -425,6 +476,7 @@ def get_sources(prefix):
|
||||
|
||||
for key in source:
|
||||
for f in source[key]:
|
||||
f = rename_files.get(f, f)
|
||||
if key.endswith('EXPORTS') and f.endswith('.h'):
|
||||
files['EXPORTS'].append(f)
|
||||
if os.path.splitext(f)[-1] in ('.c', '.asm') and not f in manual:
|
||||
@@ -443,8 +495,6 @@ def get_sources(prefix):
|
||||
t = unknown[key]
|
||||
t.append(f)
|
||||
|
||||
files['UNIFIED_SOURCES'] = [f for f in files['UNIFIED_SOURCES'] if f not in files['SOURCES']]
|
||||
|
||||
for key in files:
|
||||
files[key] = list(sorted(set(files[key])))
|
||||
|
||||
@@ -495,6 +545,7 @@ def update_and_remove_files(prefix, libvpx_files, files):
|
||||
if fdir and not os.path.exists(fdir):
|
||||
os.makedirs(fdir)
|
||||
s = os.path.join(prefix, f)
|
||||
f = rename_files.get(f, f)
|
||||
if is_new(f, s):
|
||||
if first:
|
||||
print "Copy files:"
|
||||
@@ -521,7 +572,11 @@ def update_and_remove_files(prefix, libvpx_files, files):
|
||||
copy(s, f)
|
||||
|
||||
# Remove unknown files from tree
|
||||
removed_files = [f for f in current_files if f not in libvpx_files]
|
||||
removed_files = [f for f in current_files if f not in libvpx_files and f not in rename_files.values()]
|
||||
for f in rename_files:
|
||||
if os.path.exists(f) and os.path.exists(rename_files[f]) and not f in removed_files:
|
||||
removed_files.append(f)
|
||||
|
||||
if removed_files:
|
||||
print "Remove files:"
|
||||
for f in removed_files:
|
||||
@@ -531,12 +586,13 @@ def update_and_remove_files(prefix, libvpx_files, files):
|
||||
def apply_patches():
|
||||
# Patch to permit vpx users to specify their own <stdint.h> types.
|
||||
os.system("patch -p0 < stdint.patch")
|
||||
# Patch to allow older versions of Apple's clang to build libvpx.
|
||||
os.system("patch -p3 < apple-clang.patch")
|
||||
# Patch to allow MSVC 2015 to compile libvpx
|
||||
os.system("patch -p3 < msvc2015.patch")
|
||||
# Patch to fix a crash caused by MSVC 2013
|
||||
os.system("patch -p3 < bug1137614.patch")
|
||||
# Bug 1176730 - Don't use pthread for libvpx in mingw builds.
|
||||
os.system("patch -p3 < disable_pthread_on_mingw.patch")
|
||||
# Cherry pick https://chromium-review.googlesource.com/#/c/276889/
|
||||
# to fix crash on 32bit
|
||||
os.system("patch -p1 < vp9_filter_restore_aligment.patch")
|
||||
|
||||
def update_readme(commit):
|
||||
with open('README_MOZILLA') as f:
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
@@ -103,9 +104,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
@@ -176,7 +177,7 @@ void vp8_create_common(VP8_COMMON *oci)
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
|
||||
@@ -165,7 +165,7 @@ vp8_dequant_idct_loop2_v6
|
||||
str r1, [r2], r12 ; store output to dst
|
||||
bne vp8_dequant_idct_loop2_v6
|
||||
|
||||
; vpx_memset
|
||||
; memset
|
||||
sub r0, r0, #32
|
||||
add sp, sp, #4
|
||||
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance16x16_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load 4 src pixels
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load 4 src pixels
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load 4 src pixels
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load 4 src pixels
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance8x8_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance8x8_armv6| PROC
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r12, #8 ; set loop counter to 8 (=block height)
|
||||
mov r4, #0 ; initialize sum = 0
|
||||
mov r5, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r0, #0x4] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
subs r12, r12, #1 ; next row
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r8, [sp, #32] ; get address of sse
|
||||
mul r1, r4, r4 ; sum * sum
|
||||
str r5, [r8] ; store sse
|
||||
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -99,7 +99,7 @@ void vp8_sixtap_predict4x4_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
@@ -147,7 +147,7 @@ void vp8_sixtap_predict8x8_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
@@ -189,7 +189,7 @@ void vp8_sixtap_predict16x16_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
unsigned int vp8_sad8x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 7; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad4x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x2_t d1;
|
||||
uint64x1_t d3;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
d1 = vpaddl_u16(vget_low_u16(q12));
|
||||
d3 = vpaddl_u32(d1);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x16_t q0, q4;
|
||||
uint16x8_t q12, q13;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
|
||||
}
|
||||
|
||||
q12 = vaddq_u16(q12, q13);
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x16_t q0, q4;
|
||||
uint16x8_t q12, q13;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
|
||||
|
||||
for (i = 0; i < 7; i++) {
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
|
||||
}
|
||||
|
||||
q12 = vaddq_u16(q12, q13);
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
@@ -1,320 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
unsigned int vp8_variance16x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 4; i++) { // variance16x8_neon_loop
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
uint8x8_t d0u8, d2u8, d4u8, d6u8;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint16x8_t q11u16, q12u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) { // variance8x16_neon_loop
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d2u8, d6u8);
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 2; i++) { // variance8x8_neon_loop
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d1u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d3u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d5u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d7u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d1u8, d5u8);
|
||||
q13u16 = vsubl_u8(d2u8, d6u8);
|
||||
q14u16 = vsubl_u8(d3u8, d7u8);
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
@@ -32,7 +32,7 @@ unsigned int vp8_sub_pixel_variance16x16_neon_func(
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528);
|
||||
DECLARE_ALIGNED(16, unsigned char, tmp[528]);
|
||||
unsigned char *tmpp;
|
||||
unsigned char *tmpp2;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
|
||||
@@ -911,12 +911,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_neon(
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
enum { kWidth8 = 8 };
|
||||
enum { kHeight8 = 8 };
|
||||
enum { kHeight8PlusOne = 9 };
|
||||
enum { kPixelStepOne = 1 };
|
||||
enum { kAlign16 = 16 };
|
||||
|
||||
#define FILTER_BITS 7
|
||||
|
||||
static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
|
||||
@@ -968,8 +962,8 @@ static unsigned int variance8x8_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8));
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) / (8 * 8));
|
||||
}
|
||||
|
||||
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
||||
@@ -1003,22 +997,21 @@ unsigned int vp8_sub_pixel_variance8x8_neon(
|
||||
const unsigned char *dst,
|
||||
int dst_stride,
|
||||
unsigned int *sse) {
|
||||
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8);
|
||||
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8);
|
||||
DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]);
|
||||
DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
|
||||
if (xoffset == 0) {
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8,
|
||||
kWidth8, bilinear_taps_coeff[yoffset]);
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8,
|
||||
8, bilinear_taps_coeff[yoffset]);
|
||||
} else if (yoffset == 0) {
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne,
|
||||
kHeight8PlusOne, kWidth8,
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, 1,
|
||||
9, 8,
|
||||
bilinear_taps_coeff[xoffset]);
|
||||
} else {
|
||||
var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne,
|
||||
kHeight8PlusOne, kWidth8,
|
||||
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
|
||||
9, 8,
|
||||
bilinear_taps_coeff[xoffset]);
|
||||
var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8,
|
||||
kWidth8, bilinear_taps_coeff[yoffset]);
|
||||
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
|
||||
8, bilinear_taps_coeff[yoffset]);
|
||||
}
|
||||
return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse);
|
||||
return variance8x8_neon(temp2, 8, dst, dst_stride, sse);
|
||||
}
|
||||
|
||||
|
||||
@@ -9,10 +9,14 @@
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vp8/common/filter.h"
|
||||
|
||||
// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
|
||||
#if CONFIG_VP8_ENCODER
|
||||
|
||||
#if HAVE_MEDIA
|
||||
#include "vp8/common/arm/bilinearfilter_arm.h"
|
||||
|
||||
@@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
8, 8, 8, VFilter);
|
||||
|
||||
return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
return vpx_variance8x8_media(second_pass, 8, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
@@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
16, 16, 16, VFilter);
|
||||
|
||||
var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
return var;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MEDIA */
|
||||
#endif // HAVE_MEDIA
|
||||
|
||||
|
||||
#if HAVE_NEON
|
||||
@@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon
|
||||
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // HAVE_NEON
|
||||
#endif // CONFIG_VP8_ENCODER
|
||||
|
||||
@@ -187,8 +187,12 @@ typedef struct
|
||||
{
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
||||
@@ -29,19 +29,19 @@ extern "C" {
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
vpx_memcpy( Dest, Src, sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *Dest) == sizeof( *Src)); \
|
||||
vpx_memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest));
|
||||
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
||||
@@ -81,7 +81,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
@@ -129,7 +128,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
||||
|
||||
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
@@ -38,6 +38,6 @@ void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
|
||||
@@ -183,7 +183,6 @@ const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
|
||||
sizeof(default_coef_probs));
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
||||
|
||||
@@ -159,13 +159,13 @@ const vp8_tree_index vp8_small_mvtree [14] =
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
||||
|
||||
@@ -40,9 +40,9 @@ static void copy_and_extend_plane
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
@@ -60,13 +60,13 @@ static void copy_and_extend_plane
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
|
||||
@@ -33,7 +33,7 @@ void vp8_dequant_idct_add_y_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
@@ -59,7 +59,7 @@ void vp8_dequant_idct_add_uv_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
@@ -78,7 +78,7 @@ void vp8_dequant_idct_add_uv_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Notes:
|
||||
|
||||
@@ -82,11 +82,10 @@ void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
|
||||
memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +104,7 @@ void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||
/* init hev threshold const vectors */
|
||||
for(i = 0; i < 4 ; i++)
|
||||
{
|
||||
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,8 +141,8 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
else /* Delta Value */
|
||||
{
|
||||
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||
}
|
||||
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||
}
|
||||
|
||||
if (!mbd->mode_ref_lf_delta_enabled)
|
||||
@@ -151,7 +150,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
/* we could get rid of this if we assume that deltas are set to
|
||||
* zero when not in use; encoder always uses deltas
|
||||
*/
|
||||
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -261,6 +260,7 @@ void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
|
||||
int mb_col;
|
||||
int filter_level;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
(void)post_uvstride;
|
||||
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
* higher quality.
|
||||
*/
|
||||
|
||||
#include "postproc.h"
|
||||
#include "variance.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include <limits.h>
|
||||
@@ -150,36 +151,36 @@ static void multiframe_quality_enhance_block
|
||||
|
||||
if (blksize == 16)
|
||||
{
|
||||
actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse));
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse));
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
|
||||
usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
|
||||
vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
|
||||
#endif
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse));
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse));
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
|
||||
usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
|
||||
vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -231,9 +232,9 @@ static void multiframe_quality_enhance_block
|
||||
{
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
|
||||
vpx_memcpy(udp, up, uvblksize);
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
|
||||
vpx_memcpy(vdp, vp, uvblksize);
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -341,8 +342,8 @@ void vp8_multiframe_quality_enhance
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
|
||||
vp += show->uv_stride, vdp += dest->uv_stride)
|
||||
{
|
||||
vpx_memcpy(udp, up, 4);
|
||||
vpx_memcpy(vdp, vp, 4);
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,6 +122,7 @@ extern "C"
|
||||
int Sharpness;
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
unsigned int screen_content_mode;
|
||||
|
||||
/* mode ->
|
||||
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
|
||||
|
||||
@@ -355,8 +355,8 @@ void vp8_deblock(VP8_COMMON *cm,
|
||||
else
|
||||
mb_ppl = (unsigned char)ppl;
|
||||
|
||||
vpx_memset(ylptr, mb_ppl, 16);
|
||||
vpx_memset(uvlptr, mb_ppl, 8);
|
||||
memset(ylptr, mb_ppl, 16);
|
||||
memset(uvlptr, mb_ppl, 8);
|
||||
|
||||
ylptr += 16;
|
||||
uvlptr += 8;
|
||||
@@ -403,7 +403,7 @@ void vp8_de_noise(VP8_COMMON *cm,
|
||||
(void) low_var_thresh;
|
||||
(void) flag;
|
||||
|
||||
vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols);
|
||||
memset(limits, (unsigned char)ppl, 16 * mb_cols);
|
||||
|
||||
/* TODO: The original code don't filter the 2 outer rows and columns. */
|
||||
for (mbr = 0; mbr < mb_rows; mbr++)
|
||||
@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm,
|
||||
}
|
||||
}
|
||||
|
||||
double vp8_gaussian(double sigma, double mu, double x)
|
||||
static double gaussian(double sigma, double mu, double x)
|
||||
{
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
|
||||
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
|
||||
|
||||
for (i = -32; i < 32; i++)
|
||||
{
|
||||
const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
|
||||
const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
|
||||
|
||||
if (v)
|
||||
{
|
||||
@@ -518,6 +518,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
||||
unsigned int Width, unsigned int Height, int Pitch)
|
||||
{
|
||||
unsigned int i, j;
|
||||
(void)bothclamp;
|
||||
|
||||
for (i = 0; i < Height; i++)
|
||||
{
|
||||
@@ -762,7 +763,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
/* insure that postproc is set to all 0's so that post proc
|
||||
* doesn't pull random data in from edge
|
||||
*/
|
||||
vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
|
||||
memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
@@ -30,31 +32,8 @@ void vp8_copy_mem16x16_c(
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
dst[8] = src[8];
|
||||
dst[9] = src[9];
|
||||
dst[10] = src[10];
|
||||
dst[11] = src[11];
|
||||
dst[12] = src[12];
|
||||
dst[13] = src[13];
|
||||
dst[14] = src[14];
|
||||
dst[15] = src[15];
|
||||
memcpy(dst, src, 16);
|
||||
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
|
||||
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
|
||||
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
@@ -72,19 +51,8 @@ void vp8_copy_mem8x8_c(
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
@@ -102,19 +70,8 @@ void vp8_copy_mem8x4_c(
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
|
||||
@@ -70,10 +70,10 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
|
||||
expected_dc = 128;
|
||||
}
|
||||
|
||||
/*vpx_memset(ypred_ptr, expected_dc, 256);*/
|
||||
/*memset(ypred_ptr, expected_dc, 256);*/
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
vpx_memset(ypred_ptr, expected_dc, 16);
|
||||
memset(ypred_ptr, expected_dc, 16);
|
||||
ypred_ptr += y_stride;
|
||||
}
|
||||
}
|
||||
@@ -98,7 +98,7 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
|
||||
vpx_memset(ypred_ptr, yleft_col[r], 16);
|
||||
memset(ypred_ptr, yleft_col[r], 16);
|
||||
ypred_ptr += y_stride;
|
||||
}
|
||||
|
||||
@@ -202,12 +202,12 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
}
|
||||
|
||||
|
||||
/*vpx_memset(upred_ptr,expected_udc,64);*/
|
||||
/*vpx_memset(vpred_ptr,expected_vdc,64);*/
|
||||
/*memset(upred_ptr,expected_udc,64);*/
|
||||
/*memset(vpred_ptr,expected_vdc,64);*/
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memset(upred_ptr, expected_udc, 8);
|
||||
vpx_memset(vpred_ptr, expected_vdc, 8);
|
||||
memset(upred_ptr, expected_udc, 8);
|
||||
memset(vpred_ptr, expected_vdc, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
@@ -217,8 +217,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memcpy(upred_ptr, uabove_row, 8);
|
||||
vpx_memcpy(vpred_ptr, vabove_row, 8);
|
||||
memcpy(upred_ptr, uabove_row, 8);
|
||||
memcpy(vpred_ptr, vabove_row, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
@@ -229,8 +229,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memset(upred_ptr, uleft_col[i], 8);
|
||||
vpx_memset(vpred_ptr, vleft_col[i], 8);
|
||||
memset(upred_ptr, uleft_col[i], 8);
|
||||
memset(vpred_ptr, vleft_col[i], 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
@@ -7,15 +7,13 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
extern void vpx_scale_rtcd(void);
|
||||
|
||||
void vp8_rtcd()
|
||||
{
|
||||
vpx_scale_rtcd();
|
||||
once(setup_rtcd_internal);
|
||||
}
|
||||
|
||||
@@ -1,302 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad, int m, int n)
|
||||
{
|
||||
int r, c;
|
||||
unsigned int sad = 0;
|
||||
|
||||
for (r = 0; r < n; r++)
|
||||
{
|
||||
for (c = 0; c < m; c++)
|
||||
{
|
||||
sad += abs(src_ptr[c] - ref_ptr[c]);
|
||||
}
|
||||
|
||||
if (sad > max_sad)
|
||||
break;
|
||||
|
||||
src_ptr += src_stride;
|
||||
ref_ptr += ref_stride;
|
||||
}
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
/* max_sad is provided as an optional optimization point. Alternative
|
||||
* implementations of these functions are not required to check it.
|
||||
*/
|
||||
|
||||
unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
dst_ptr[4] = src_ptr[4];
|
||||
dst_ptr[5] = src_ptr[5];
|
||||
dst_ptr[6] = src_ptr[6];
|
||||
dst_ptr[7] = src_ptr[7];
|
||||
dst_ptr[8] = src_ptr[8];
|
||||
dst_ptr[9] = src_ptr[9];
|
||||
dst_ptr[10] = src_ptr[10];
|
||||
dst_ptr[11] = src_ptr[11];
|
||||
dst_ptr[12] = src_ptr[12];
|
||||
dst_ptr[13] = src_ptr[13];
|
||||
dst_ptr[14] = src_ptr[14];
|
||||
dst_ptr[15] = src_ptr[15];
|
||||
dst_ptr[16] = src_ptr[16];
|
||||
dst_ptr[17] = src_ptr[17];
|
||||
dst_ptr[18] = src_ptr[18];
|
||||
dst_ptr[19] = src_ptr[19];
|
||||
dst_ptr[20] = src_ptr[20];
|
||||
dst_ptr[21] = src_ptr[21];
|
||||
dst_ptr[22] = src_ptr[22];
|
||||
dst_ptr[23] = src_ptr[23];
|
||||
dst_ptr[24] = src_ptr[24];
|
||||
dst_ptr[25] = src_ptr[25];
|
||||
dst_ptr[26] = src_ptr[26];
|
||||
dst_ptr[27] = src_ptr[27];
|
||||
dst_ptr[28] = src_ptr[28];
|
||||
dst_ptr[29] = src_ptr[29];
|
||||
dst_ptr[30] = src_ptr[30];
|
||||
dst_ptr[31] = src_ptr[31];
|
||||
#else
|
||||
((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
|
||||
((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
|
||||
((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
|
||||
((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
|
||||
((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
|
||||
((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
|
||||
((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
|
||||
((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
|
||||
#endif
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
||||
@@ -17,15 +17,15 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
int i;
|
||||
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
for (i = 0; i < ybf->y_height; i++)
|
||||
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
@@ -33,7 +33,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
}
|
||||
|
||||
@@ -14,50 +14,42 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned int(*vp8_sad_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
typedef unsigned int(*vpx_sad_fn_t)(
|
||||
const uint8_t *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned int max_sad);
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride);
|
||||
|
||||
typedef void (*vp8_copy32xn_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
int n);
|
||||
|
||||
typedef void (*vp8_sad_multi_fn_t)(
|
||||
typedef void (*vpx_sad_multi_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
const unsigned char *ref_array,
|
||||
int ref_stride,
|
||||
unsigned int *sad_array);
|
||||
|
||||
typedef void (*vp8_sad_multi1_fn_t)
|
||||
typedef void (*vpx_sad_multi_d_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
);
|
||||
|
||||
typedef void (*vp8_sad_multi_d_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char * const ref_ptr[],
|
||||
const unsigned char * const ref_array[],
|
||||
int ref_stride,
|
||||
unsigned int *sad_array
|
||||
);
|
||||
|
||||
typedef unsigned int (*vp8_variance_fn_t)
|
||||
typedef unsigned int (*vpx_variance_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
@@ -77,40 +69,17 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
|
||||
unsigned int *sse
|
||||
);
|
||||
|
||||
typedef void (*vp8_ssimpf_fn_t)
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
);
|
||||
|
||||
typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
|
||||
|
||||
typedef unsigned int (*vp8_get16x16prederror_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride
|
||||
);
|
||||
|
||||
typedef struct variance_vtable
|
||||
{
|
||||
vp8_sad_fn_t sdf;
|
||||
vp8_variance_fn_t vf;
|
||||
vpx_sad_fn_t sdf;
|
||||
vpx_variance_fn_t vf;
|
||||
vp8_subpixvariance_fn_t svf;
|
||||
vp8_variance_fn_t svf_halfpix_h;
|
||||
vp8_variance_fn_t svf_halfpix_v;
|
||||
vp8_variance_fn_t svf_halfpix_hv;
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
vpx_variance_fn_t svf_halfpix_h;
|
||||
vpx_variance_fn_t svf_halfpix_v;
|
||||
vpx_variance_fn_t svf_halfpix_hv;
|
||||
vpx_sad_multi_fn_t sdx3f;
|
||||
vpx_sad_multi_fn_t sdx8f;
|
||||
vpx_sad_multi_d_fn_t sdx4df;
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_copy32xn_fn_t copymem;
|
||||
#endif
|
||||
|
||||
@@ -8,43 +8,34 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "variance.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "filter.h"
|
||||
#include "variance.h"
|
||||
|
||||
|
||||
unsigned int vp8_get_mb_ss_c
|
||||
(
|
||||
const short *src_ptr
|
||||
)
|
||||
{
|
||||
unsigned int i = 0, sum = 0;
|
||||
|
||||
do
|
||||
{
|
||||
sum += (src_ptr[i] * src_ptr[i]);
|
||||
i++;
|
||||
}
|
||||
while (i < 256);
|
||||
|
||||
return sum;
|
||||
/* This is a bad idea.
|
||||
* ctz = count trailing zeros */
|
||||
static int ctz(int a) {
|
||||
int b = 0;
|
||||
while (a != 1) {
|
||||
a >>= 1;
|
||||
b++;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
static void variance(
|
||||
static unsigned int variance(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
int w,
|
||||
int h,
|
||||
unsigned int *sse,
|
||||
int *sum)
|
||||
unsigned int *sse)
|
||||
{
|
||||
int i, j;
|
||||
int diff;
|
||||
int diff, sum;
|
||||
|
||||
*sum = 0;
|
||||
sum = 0;
|
||||
*sse = 0;
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
@@ -52,114 +43,17 @@ static void variance(
|
||||
for (j = 0; j < w; j++)
|
||||
{
|
||||
diff = src_ptr[j] - ref_ptr[j];
|
||||
*sum += diff;
|
||||
sum += diff;
|
||||
*sse += diff * diff;
|
||||
}
|
||||
|
||||
src_ptr += source_stride;
|
||||
ref_ptr += recon_stride;
|
||||
}
|
||||
|
||||
return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance4x4_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_mse16x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
@@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
|
||||
/* Now filter Verticaly */
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
|
||||
|
||||
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
|
||||
return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
|
||||
}
|
||||
|
||||
|
||||
@@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
|
||||
|
||||
return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_c
|
||||
@@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
|
||||
|
||||
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
|
||||
}
|
||||
|
||||
|
||||
@@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
|
||||
|
||||
return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance8x16_c
|
||||
@@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
|
||||
|
||||
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse2(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;dst_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;dst_stride
|
||||
movsxd rcx, dword ptr arg(4) ;height
|
||||
|
||||
.block_copy_sse2_loopx4:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm2, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqu xmm4, XMMWORD PTR [rsi]
|
||||
movdqu xmm5, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm6, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm2
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm4
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm5
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm6
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
sub rcx, 4
|
||||
cmp rcx, 4
|
||||
jge .block_copy_sse2_loopx4
|
||||
|
||||
cmp rcx, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse2_loop:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
lea rdi, [rdi+rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jne .block_copy_sse2_loop
|
||||
|
||||
.copy_is_done:
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
@@ -0,0 +1,146 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE_X3 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define ref_ptr rdi
|
||||
%define ref_stride rdx
|
||||
%define end_ptr rcx
|
||||
%define ret_var rbx
|
||||
%define result_ptr arg(4)
|
||||
%define max_sad arg(4)
|
||||
%define height dword ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
mov rdi, arg(2) ; ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ; src_stride
|
||||
movsxd rdx, dword ptr arg(3) ; ref_stride
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define ref_ptr r8
|
||||
%define ref_stride r9
|
||||
%define end_ptr r10
|
||||
%define ret_var r11
|
||||
%define result_ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%define max_sad [rsp+xmm_stack_space+8+4*8]
|
||||
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define ref_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define end_ptr r9
|
||||
%define ret_var r10
|
||||
%define result_ptr r8
|
||||
%define max_sad r8
|
||||
%define height r8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X3 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define ref_ptr
|
||||
%define ref_stride
|
||||
%define end_ptr
|
||||
%define ret_var
|
||||
%define result_ptr
|
||||
%define max_sad
|
||||
%define height
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
.block_copy_sse3_loopx4:
|
||||
lea end_ptr, [src_ptr+src_stride*2]
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
||||
movdqu xmm4, XMMWORD PTR [end_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
||||
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
||||
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*4]
|
||||
|
||||
lea end_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
||||
movdqa XMMWORD PTR [end_ptr], xmm4
|
||||
movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
||||
|
||||
lea ref_ptr, [ref_ptr+ref_stride*4]
|
||||
|
||||
sub height, 4
|
||||
cmp height, 4
|
||||
jge .block_copy_sse3_loopx4
|
||||
|
||||
;Check to see if there is more rows need to be copied.
|
||||
cmp height, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse3_loop:
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
lea src_ptr, [src_ptr+src_stride]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
lea ref_ptr, [ref_ptr+ref_stride]
|
||||
|
||||
sub height, 1
|
||||
jne .block_copy_sse3_loop
|
||||
|
||||
.copy_is_done:
|
||||
STACK_FRAME_DESTROY_X3
|
||||
@@ -36,7 +36,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -45,7 +45,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
|
||||
dst+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
@@ -54,7 +54,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
|
||||
dst+8, stride);
|
||||
vpx_memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
@@ -63,7 +63,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
|
||||
dst+12, stride);
|
||||
vpx_memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 64;
|
||||
@@ -85,7 +85,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -94,7 +94,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
@@ -109,7 +109,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -118,7 +118,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
@@ -1,410 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;unsigned int vp8_sad16x16_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x16_wmt) PRIVATE
|
||||
sym(vp8_sad16x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rax*8]
|
||||
|
||||
lea rcx, [rcx+rax*8]
|
||||
pxor xmm6, xmm6
|
||||
|
||||
.x16x16sad_wmt_loop:
|
||||
|
||||
movq xmm0, QWORD PTR [rsi]
|
||||
movq xmm2, QWORD PTR [rsi+8]
|
||||
|
||||
movq xmm1, QWORD PTR [rdi]
|
||||
movq xmm3, QWORD PTR [rdi+8]
|
||||
|
||||
movq xmm4, QWORD PTR [rsi+rax]
|
||||
movq xmm5, QWORD PTR [rdi+rdx]
|
||||
|
||||
|
||||
punpcklbw xmm0, xmm2
|
||||
punpcklbw xmm1, xmm3
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
movq xmm2, QWORD PTR [rsi+rax+8]
|
||||
|
||||
movq xmm3, QWORD PTR [rdi+rdx+8]
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
punpcklbw xmm4, xmm2
|
||||
|
||||
punpcklbw xmm5, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
|
||||
paddw xmm6, xmm0
|
||||
paddw xmm6, xmm4
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x16x16sad_wmt_loop
|
||||
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movq rax, xmm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;unsigned int vp8_sad8x16_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
global sym(vp8_sad8x16_wmt) PRIVATE
|
||||
sym(vp8_sad8x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
|
||||
lea rcx, [rcx+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x8x16sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x8x16sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
|
||||
movq mm2, QWORD PTR [rsi+rbx]
|
||||
movq mm3, QWORD PTR [rdi+rdx]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
lea rsi, [rsi+rbx*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm2
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x8x16sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
|
||||
.x8x16sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_sad8x8_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad8x8_wmt) PRIVATE
|
||||
sym(vp8_sad8x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x8x8sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x8x8sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
|
||||
psadbw mm0, mm1
|
||||
lea rsi, [rsi+rbx]
|
||||
|
||||
add rdi, rdx
|
||||
paddw mm7, mm0
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x8x8sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
.x8x8sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;unsigned int vp8_sad4x4_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad4x4_wmt) PRIVATE
|
||||
sym(vp8_sad4x4_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
movd mm0, DWORD PTR [rsi]
|
||||
movd mm1, DWORD PTR [rdi]
|
||||
|
||||
movd mm2, DWORD PTR [rsi+rax]
|
||||
movd mm3, DWORD PTR [rdi+rdx]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
psadbw mm0, mm1
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
movd mm4, DWORD PTR [rsi]
|
||||
|
||||
movd mm5, DWORD PTR [rdi]
|
||||
movd mm6, DWORD PTR [rsi+rax]
|
||||
|
||||
movd mm7, DWORD PTR [rdi+rdx]
|
||||
punpcklbw mm4, mm6
|
||||
|
||||
punpcklbw mm5, mm7
|
||||
psadbw mm4, mm5
|
||||
|
||||
paddw mm0, mm4
|
||||
movq rax, mm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_sad16x8_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x8_wmt) PRIVATE
|
||||
sym(vp8_sad16x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x16x8sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x16x8sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm2, QWORD PTR [rsi+8]
|
||||
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
movq mm3, QWORD PTR [rdi+8]
|
||||
|
||||
movq mm4, QWORD PTR [rsi+rbx]
|
||||
movq mm5, QWORD PTR [rdi+rdx]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
movq mm1, QWORD PTR [rsi+rbx+8]
|
||||
movq mm3, QWORD PTR [rdi+rdx+8]
|
||||
|
||||
psadbw mm4, mm5
|
||||
psadbw mm1, mm3
|
||||
|
||||
lea rsi, [rsi+rbx*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
paddw mm0, mm2
|
||||
paddw mm4, mm1
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm4
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x16x8sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
|
||||
.x16x8sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_copy32xn_sse2(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;dst_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;dst_stride
|
||||
movsxd rcx, dword ptr arg(4) ;height
|
||||
|
||||
.block_copy_sse2_loopx4:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm2, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqu xmm4, XMMWORD PTR [rsi]
|
||||
movdqu xmm5, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm6, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm2
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm4
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm5
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm6
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
sub rcx, 4
|
||||
cmp rcx, 4
|
||||
jge .block_copy_sse2_loopx4
|
||||
|
||||
cmp rcx, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse2_loop:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
lea rdi, [rdi+rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jne .block_copy_sse2_loop
|
||||
|
||||
.copy_is_done:
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
@@ -1,960 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE_X3 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define ref_ptr rdi
|
||||
%define ref_stride rdx
|
||||
%define end_ptr rcx
|
||||
%define ret_var rbx
|
||||
%define result_ptr arg(4)
|
||||
%define max_sad arg(4)
|
||||
%define height dword ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
mov rdi, arg(2) ; ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ; src_stride
|
||||
movsxd rdx, dword ptr arg(3) ; ref_stride
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define ref_ptr r8
|
||||
%define ref_stride r9
|
||||
%define end_ptr r10
|
||||
%define ret_var r11
|
||||
%define result_ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%define max_sad [rsp+xmm_stack_space+8+4*8]
|
||||
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define ref_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define end_ptr r9
|
||||
%define ret_var r10
|
||||
%define result_ptr r8
|
||||
%define max_sad r8
|
||||
%define height r8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X3 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define ref_ptr
|
||||
%define ref_stride
|
||||
%define end_ptr
|
||||
%define ret_var
|
||||
%define result_ptr
|
||||
%define max_sad
|
||||
%define height
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_CREATE_X4 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define r0_ptr rcx
|
||||
%define r1_ptr rdx
|
||||
%define r2_ptr rbx
|
||||
%define r3_ptr rdi
|
||||
%define ref_stride rbp
|
||||
%define result_ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
push rbp
|
||||
mov rdi, arg(2) ; ref_ptr_base
|
||||
|
||||
LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ; src_stride
|
||||
movsxd rbp, dword ptr arg(3) ; ref_stride
|
||||
|
||||
xchg rbx, rax
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define r0_ptr rsi
|
||||
%define r1_ptr r10
|
||||
%define r2_ptr r11
|
||||
%define r3_ptr r8
|
||||
%define ref_stride r9
|
||||
%define result_ptr [rsp+xmm_stack_space+16+4*8]
|
||||
push rsi
|
||||
|
||||
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define r0_ptr r9
|
||||
%define r1_ptr r10
|
||||
%define r2_ptr r11
|
||||
%define r3_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define result_ptr r8
|
||||
|
||||
LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
||||
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X4 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define r0_ptr
|
||||
%define r1_ptr
|
||||
%define r2_ptr
|
||||
%define r3_ptr
|
||||
%define ref_stride
|
||||
%define result_ptr
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_16X2X3 5
|
||||
%if %1==0
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm5, XMMWORD PTR [%3]
|
||||
lddqu xmm6, XMMWORD PTR [%3+1]
|
||||
lddqu xmm7, XMMWORD PTR [%3+2]
|
||||
|
||||
psadbw xmm5, xmm0
|
||||
psadbw xmm6, xmm0
|
||||
psadbw xmm7, xmm0
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm1, XMMWORD PTR [%3]
|
||||
lddqu xmm2, XMMWORD PTR [%3+1]
|
||||
lddqu xmm3, XMMWORD PTR [%3+2]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm5, xmm1
|
||||
paddw xmm6, xmm2
|
||||
paddw xmm7, xmm3
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [%2+%4]
|
||||
lddqu xmm1, XMMWORD PTR [%3+%5]
|
||||
lddqu xmm2, XMMWORD PTR [%3+%5+1]
|
||||
lddqu xmm3, XMMWORD PTR [%3+%5+2]
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%4*2]
|
||||
lea %3, [%3+%5*2]
|
||||
%endif
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm5, xmm1
|
||||
paddw xmm6, xmm2
|
||||
paddw xmm7, xmm3
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X3 5
|
||||
%if %1==0
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm5, QWORD PTR [%3]
|
||||
movq mm6, QWORD PTR [%3+1]
|
||||
movq mm7, QWORD PTR [%3+2]
|
||||
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
psadbw mm7, mm0
|
||||
%else
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm1, QWORD PTR [%3]
|
||||
movq mm2, QWORD PTR [%3+1]
|
||||
movq mm3, QWORD PTR [%3+2]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm5, mm1
|
||||
paddw mm6, mm2
|
||||
paddw mm7, mm3
|
||||
%endif
|
||||
movq mm0, QWORD PTR [%2+%4]
|
||||
movq mm1, QWORD PTR [%3+%5]
|
||||
movq mm2, QWORD PTR [%3+%5+1]
|
||||
movq mm3, QWORD PTR [%3+%5+2]
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%4*2]
|
||||
lea %3, [%3+%5*2]
|
||||
%endif
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm5, mm1
|
||||
paddw mm6, mm2
|
||||
paddw mm7, mm3
|
||||
%endmacro
|
||||
|
||||
%macro LOAD_X4_ADDRESSES 5
|
||||
mov %2, [%1+REG_SZ_BYTES*0]
|
||||
mov %3, [%1+REG_SZ_BYTES*1]
|
||||
|
||||
mov %4, [%1+REG_SZ_BYTES*2]
|
||||
mov %5, [%1+REG_SZ_BYTES*3]
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_16X2X4 8
|
||||
%if %1==0
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm4, XMMWORD PTR [%3]
|
||||
lddqu xmm5, XMMWORD PTR [%4]
|
||||
lddqu xmm6, XMMWORD PTR [%5]
|
||||
lddqu xmm7, XMMWORD PTR [%6]
|
||||
|
||||
psadbw xmm4, xmm0
|
||||
psadbw xmm5, xmm0
|
||||
psadbw xmm6, xmm0
|
||||
psadbw xmm7, xmm0
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm1, XMMWORD PTR [%3]
|
||||
lddqu xmm2, XMMWORD PTR [%4]
|
||||
lddqu xmm3, XMMWORD PTR [%5]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm4, xmm1
|
||||
lddqu xmm1, XMMWORD PTR [%6]
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm6, xmm3
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
paddw xmm7, xmm1
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [%2+%7]
|
||||
lddqu xmm1, XMMWORD PTR [%3+%8]
|
||||
lddqu xmm2, XMMWORD PTR [%4+%8]
|
||||
lddqu xmm3, XMMWORD PTR [%5+%8]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm4, xmm1
|
||||
lddqu xmm1, XMMWORD PTR [%6+%8]
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm6, xmm3
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%7*2]
|
||||
lea %3, [%3+%8*2]
|
||||
|
||||
lea %4, [%4+%8*2]
|
||||
lea %5, [%5+%8*2]
|
||||
|
||||
lea %6, [%6+%8*2]
|
||||
%endif
|
||||
psadbw xmm1, xmm0
|
||||
paddw xmm7, xmm1
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X4 8
|
||||
%if %1==0
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm4, QWORD PTR [%3]
|
||||
movq mm5, QWORD PTR [%4]
|
||||
movq mm6, QWORD PTR [%5]
|
||||
movq mm7, QWORD PTR [%6]
|
||||
|
||||
psadbw mm4, mm0
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
psadbw mm7, mm0
|
||||
%else
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm1, QWORD PTR [%3]
|
||||
movq mm2, QWORD PTR [%4]
|
||||
movq mm3, QWORD PTR [%5]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm4, mm1
|
||||
movq mm1, QWORD PTR [%6]
|
||||
paddw mm5, mm2
|
||||
paddw mm6, mm3
|
||||
|
||||
psadbw mm1, mm0
|
||||
paddw mm7, mm1
|
||||
%endif
|
||||
movq mm0, QWORD PTR [%2+%7]
|
||||
movq mm1, QWORD PTR [%3+%8]
|
||||
movq mm2, QWORD PTR [%4+%8]
|
||||
movq mm3, QWORD PTR [%5+%8]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm4, mm1
|
||||
movq mm1, QWORD PTR [%6+%8]
|
||||
paddw mm5, mm2
|
||||
paddw mm6, mm3
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%7*2]
|
||||
lea %3, [%3+%8*2]
|
||||
|
||||
lea %4, [%4+%8*2]
|
||||
lea %5, [%5+%8*2]
|
||||
|
||||
lea %6, [%6+%8*2]
|
||||
%endif
|
||||
psadbw mm1, mm0
|
||||
paddw mm7, mm1
|
||||
|
||||
%endmacro
|
||||
|
||||
;void int vp8_sad16x16x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+8], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad16x8x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+8], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad8x16x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm5, mm6
|
||||
|
||||
movq [rcx], mm5
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad8x8x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm5, mm6
|
||||
|
||||
movq [rcx], mm5
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad4x4x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x3_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm1, DWORD PTR [ref_ptr]
|
||||
|
||||
movd mm2, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm3, DWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
movd mm4, DWORD PTR [ref_ptr+1]
|
||||
movd mm5, DWORD PTR [ref_ptr+2]
|
||||
|
||||
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
||||
movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
|
||||
|
||||
psadbw mm1, mm0
|
||||
|
||||
punpcklbw mm4, mm2
|
||||
punpcklbw mm5, mm3
|
||||
|
||||
psadbw mm4, mm0
|
||||
psadbw mm5, mm0
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm2, DWORD PTR [ref_ptr]
|
||||
|
||||
movd mm3, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm6, DWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm3
|
||||
punpcklbw mm2, mm6
|
||||
|
||||
movd mm3, DWORD PTR [ref_ptr+1]
|
||||
movd mm7, DWORD PTR [ref_ptr+2]
|
||||
|
||||
psadbw mm2, mm0
|
||||
|
||||
paddw mm1, mm2
|
||||
|
||||
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
||||
movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
|
||||
|
||||
punpcklbw mm3, mm2
|
||||
punpcklbw mm7, mm6
|
||||
|
||||
psadbw mm3, mm0
|
||||
psadbw mm7, mm0
|
||||
|
||||
paddw mm3, mm4
|
||||
paddw mm7, mm5
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm1, mm3
|
||||
|
||||
movq [rcx], mm1
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;unsigned int vp8_sad16x16_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
;%define lddqu movdqu
|
||||
global sym(vp8_sad16x16_sse3) PRIVATE
|
||||
sym(vp8_sad16x16_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
mov end_ptr, 4
|
||||
pxor xmm7, xmm7
|
||||
|
||||
.vp8_sad16x16_sse3_loop:
|
||||
movdqa xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw xmm2, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
psadbw xmm6, xmm1
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw xmm7, xmm0
|
||||
paddw xmm7, xmm2
|
||||
paddw xmm7, xmm4
|
||||
paddw xmm7, xmm6
|
||||
|
||||
sub end_ptr, 1
|
||||
jne .vp8_sad16x16_sse3_loop
|
||||
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
paddw xmm0, xmm7
|
||||
movq rax, xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void vp8_copy32xn_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
.block_copy_sse3_loopx4:
|
||||
lea end_ptr, [src_ptr+src_stride*2]
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
||||
movdqu xmm4, XMMWORD PTR [end_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
||||
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
||||
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*4]
|
||||
|
||||
lea end_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
||||
movdqa XMMWORD PTR [end_ptr], xmm4
|
||||
movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
||||
|
||||
lea ref_ptr, [ref_ptr+ref_stride*4]
|
||||
|
||||
sub height, 4
|
||||
cmp height, 4
|
||||
jge .block_copy_sse3_loopx4
|
||||
|
||||
;Check to see if there is more rows need to be copied.
|
||||
cmp height, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse3_loop:
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
lea src_ptr, [src_ptr+src_stride]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
lea ref_ptr, [ref_ptr+ref_stride]
|
||||
|
||||
sub height, 1
|
||||
jne .block_copy_sse3_loop
|
||||
|
||||
.copy_is_done:
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void vp8_sad16x16x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm4
|
||||
psrldq xmm4, 8
|
||||
|
||||
paddw xmm0, xmm4
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+8], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+12], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void vp8_sad16x8x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm4
|
||||
psrldq xmm4, 8
|
||||
|
||||
paddw xmm0, xmm4
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+8], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+12], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad8x16x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm4, mm5
|
||||
punpckldq mm6, mm7
|
||||
|
||||
movq [rcx], mm4
|
||||
movq [rcx+8], mm6
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad8x8x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm4, mm5
|
||||
punpckldq mm6, mm7
|
||||
|
||||
movq [rcx], mm4
|
||||
movq [rcx+8], mm6
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad4x4x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x4d_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm1, DWORD PTR [r0_ptr]
|
||||
|
||||
movd mm2, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm3, DWORD PTR [r0_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
movd mm4, DWORD PTR [r1_ptr]
|
||||
movd mm5, DWORD PTR [r2_ptr]
|
||||
|
||||
movd mm6, DWORD PTR [r3_ptr]
|
||||
movd mm2, DWORD PTR [r1_ptr+ref_stride]
|
||||
|
||||
movd mm3, DWORD PTR [r2_ptr+ref_stride]
|
||||
movd mm7, DWORD PTR [r3_ptr+ref_stride]
|
||||
|
||||
psadbw mm1, mm0
|
||||
|
||||
punpcklbw mm4, mm2
|
||||
punpcklbw mm5, mm3
|
||||
|
||||
punpcklbw mm6, mm7
|
||||
psadbw mm4, mm0
|
||||
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
|
||||
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea r0_ptr, [r0_ptr+ref_stride*2]
|
||||
|
||||
lea r1_ptr, [r1_ptr+ref_stride*2]
|
||||
lea r2_ptr, [r2_ptr+ref_stride*2]
|
||||
|
||||
lea r3_ptr, [r3_ptr+ref_stride*2]
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm2, DWORD PTR [r0_ptr]
|
||||
|
||||
movd mm3, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm7, DWORD PTR [r0_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm3
|
||||
punpcklbw mm2, mm7
|
||||
|
||||
movd mm3, DWORD PTR [r1_ptr]
|
||||
movd mm7, DWORD PTR [r2_ptr]
|
||||
|
||||
psadbw mm2, mm0
|
||||
%if ABI_IS_32BIT
|
||||
mov rax, rbp
|
||||
|
||||
pop rbp
|
||||
%define ref_stride rax
|
||||
%endif
|
||||
mov rsi, result_ptr
|
||||
|
||||
paddw mm1, mm2
|
||||
movd [rsi], mm1
|
||||
|
||||
movd mm2, DWORD PTR [r1_ptr+ref_stride]
|
||||
movd mm1, DWORD PTR [r2_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm3, mm2
|
||||
punpcklbw mm7, mm1
|
||||
|
||||
psadbw mm3, mm0
|
||||
psadbw mm7, mm0
|
||||
|
||||
movd mm2, DWORD PTR [r3_ptr]
|
||||
movd mm1, DWORD PTR [r3_ptr+ref_stride]
|
||||
|
||||
paddw mm3, mm4
|
||||
paddw mm7, mm5
|
||||
|
||||
movd [rsi+4], mm3
|
||||
punpcklbw mm2, mm1
|
||||
|
||||
movd [rsi+8], mm7
|
||||
psadbw mm2, mm0
|
||||
|
||||
paddw mm2, mm6
|
||||
movd [rsi+12], mm2
|
||||
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
@@ -1,353 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro PROCESS_16X2X8 1
|
||||
%if %1
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm1, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm1, xmm3
|
||||
paddw xmm1, xmm4
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
movq xmm2, MMWORD PTR [rdi+ rdx+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X8 1
|
||||
%if %1
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm1, xmm2
|
||||
%else
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movq xmm0, MMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_4X2X8 1
|
||||
%if %1
|
||||
movd xmm0, [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
%else
|
||||
movd xmm0, [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movd xmm0, [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_sad16x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array);
|
||||
global sym(vp8_sad16x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad16x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad16x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad4x4x8_c(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad4x4x8_sse4) PRIVATE
|
||||
sym(vp8_sad4x4x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -13,393 +13,6 @@
|
||||
|
||||
%define xmm_filter_shift 7
|
||||
|
||||
;unsigned int vp8_get_mb_ss_sse2
|
||||
;(
|
||||
; short *src_ptr
|
||||
;)
|
||||
global sym(vp8_get_mb_ss_sse2) PRIVATE
|
||||
sym(vp8_get_mb_ss_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 1
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rax, arg(0) ;[src_ptr]
|
||||
mov rcx, 8
|
||||
pxor xmm4, xmm4
|
||||
|
||||
.NEXTROW:
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm1, [rax+16]
|
||||
movdqa xmm2, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
pmaddwd xmm0, xmm0
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
pmaddwd xmm3, xmm3
|
||||
|
||||
paddd xmm0, xmm1
|
||||
paddd xmm2, xmm3
|
||||
paddd xmm4, xmm0
|
||||
paddd xmm4, xmm2
|
||||
|
||||
add rax, 0x40
|
||||
dec rcx
|
||||
ja .NEXTROW
|
||||
|
||||
movdqa xmm3,xmm4
|
||||
psrldq xmm4,8
|
||||
paddd xmm4,xmm3
|
||||
movdqa xmm3,xmm4
|
||||
psrldq xmm4,4
|
||||
paddd xmm4,xmm3
|
||||
movq rax,xmm4
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_get16x16var_sse2
|
||||
;(
|
||||
; unsigned char * src_ptr,
|
||||
; int source_stride,
|
||||
; unsigned char * ref_ptr,
|
||||
; int recon_stride,
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get16x16var_sse2) PRIVATE
|
||||
sym(vp8_get16x16var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;[src_ptr]
|
||||
mov rdi, arg(2) ;[ref_ptr]
|
||||
|
||||
movsxd rax, DWORD PTR arg(1) ;[source_stride]
|
||||
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
|
||||
|
||||
; Prefetch data
|
||||
lea rcx, [rax+rax*2]
|
||||
prefetcht0 [rsi]
|
||||
prefetcht0 [rsi+rax]
|
||||
prefetcht0 [rsi+rax*2]
|
||||
prefetcht0 [rsi+rcx]
|
||||
lea rbx, [rsi+rax*4]
|
||||
prefetcht0 [rbx]
|
||||
prefetcht0 [rbx+rax]
|
||||
prefetcht0 [rbx+rax*2]
|
||||
prefetcht0 [rbx+rcx]
|
||||
|
||||
lea rcx, [rdx+rdx*2]
|
||||
prefetcht0 [rdi]
|
||||
prefetcht0 [rdi+rdx]
|
||||
prefetcht0 [rdi+rdx*2]
|
||||
prefetcht0 [rdi+rcx]
|
||||
lea rbx, [rdi+rdx*4]
|
||||
prefetcht0 [rbx]
|
||||
prefetcht0 [rbx+rdx]
|
||||
prefetcht0 [rbx+rdx*2]
|
||||
prefetcht0 [rbx+rcx]
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
|
||||
|
||||
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
|
||||
mov rcx, 16
|
||||
|
||||
.var16loop:
|
||||
movdqu xmm1, XMMWORD PTR [rsi]
|
||||
movdqu xmm2, XMMWORD PTR [rdi]
|
||||
|
||||
prefetcht0 [rsi+rax*8]
|
||||
prefetcht0 [rdi+rdx*8]
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpckhbw xmm3, xmm0
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
|
||||
psubw xmm1, xmm2
|
||||
psubw xmm3, xmm4
|
||||
|
||||
paddw xmm7, xmm1
|
||||
pmaddwd xmm1, xmm1
|
||||
|
||||
paddw xmm7, xmm3
|
||||
pmaddwd xmm3, xmm3
|
||||
|
||||
paddd xmm6, xmm1
|
||||
paddd xmm6, xmm3
|
||||
|
||||
add rsi, rax
|
||||
add rdi, rdx
|
||||
|
||||
sub rcx, 1
|
||||
jnz .var16loop
|
||||
|
||||
|
||||
movdqa xmm1, xmm6
|
||||
pxor xmm6, xmm6
|
||||
|
||||
pxor xmm5, xmm5
|
||||
punpcklwd xmm6, xmm7
|
||||
|
||||
punpckhwd xmm5, xmm7
|
||||
psrad xmm5, 16
|
||||
|
||||
psrad xmm6, 16
|
||||
paddd xmm6, xmm5
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpckldq xmm1, xmm0
|
||||
|
||||
punpckhdq xmm2, xmm0
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
paddd xmm1, xmm2
|
||||
punpckldq xmm6, xmm0
|
||||
|
||||
punpckhdq xmm7, xmm0
|
||||
paddd xmm6, xmm7
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddd xmm7, xmm6
|
||||
paddd xmm1, xmm2
|
||||
|
||||
mov rax, arg(5) ;[Sum]
|
||||
mov rdi, arg(4) ;[SSE]
|
||||
|
||||
movd DWORD PTR [rax], xmm7
|
||||
movd DWORD PTR [rdi], xmm1
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
;unsigned int vp8_get8x8var_sse2
|
||||
;(
|
||||
; unsigned char * src_ptr,
|
||||
; int source_stride,
|
||||
; unsigned char * ref_ptr,
|
||||
; int recon_stride,
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get8x8var_sse2) PRIVATE
|
||||
sym(vp8_get8x8var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;[src_ptr]
|
||||
mov rdi, arg(2) ;[ref_ptr]
|
||||
|
||||
movsxd rax, DWORD PTR arg(1) ;[source_stride]
|
||||
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
|
||||
|
||||
movq xmm1, QWORD PTR [rsi]
|
||||
movq xmm2, QWORD PTR [rdi]
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
psubsw xmm1, xmm2
|
||||
paddw xmm7, xmm1
|
||||
|
||||
pmaddwd xmm1, xmm1
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax * 2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx * 2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax *2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx *2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax *2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx *2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpcklwd xmm6, xmm0
|
||||
|
||||
punpckhwd xmm7, xmm0
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
paddw xmm6, xmm7
|
||||
punpckldq xmm1, xmm0
|
||||
|
||||
punpckhdq xmm2, xmm0
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
paddd xmm1, xmm2
|
||||
punpckldq xmm6, xmm0
|
||||
|
||||
punpckhdq xmm7, xmm0
|
||||
paddw xmm6, xmm7
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm7, xmm6
|
||||
paddd xmm1, xmm2
|
||||
|
||||
mov rax, arg(5) ;[Sum]
|
||||
mov rdi, arg(4) ;[SSE]
|
||||
|
||||
movq rdx, xmm7
|
||||
movsx rcx, dx
|
||||
|
||||
mov dword ptr [rax], ecx
|
||||
movd DWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_filter_block2d_bil_var_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
|
||||
@@ -8,19 +8,11 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern unsigned int vp8_get16x16var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
|
||||
@@ -127,7 +127,7 @@ void vp8_sixtap_predict4x4_mmx
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
@@ -148,7 +148,7 @@ void vp8_sixtap_predict16x16_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -180,7 +180,7 @@ void vp8_sixtap_predict8x8_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -206,7 +206,7 @@ void vp8_sixtap_predict8x4_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -252,7 +252,7 @@ void vp8_sixtap_predict16x16_sse2
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -292,7 +292,7 @@ void vp8_sixtap_predict8x8_sse2
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
@@ -330,7 +330,7 @@ void vp8_sixtap_predict8x4_sse2
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
@@ -432,7 +432,7 @@ void vp8_sixtap_predict16x16_ssse3
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -480,7 +480,7 @@ void vp8_sixtap_predict8x8_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -528,7 +528,7 @@ void vp8_sixtap_predict8x4_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -576,7 +576,7 @@ void vp8_sixtap_predict4x4_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,353 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%define mmx_filter_shift 7
|
||||
|
||||
;void vp8_filter_block2d_bil4x4_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil4x4_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
|
||||
mov rax, arg(4) ;HFilter ;
|
||||
mov rdx, arg(5) ;VFilter ;
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
|
||||
mov rcx, 4 ;
|
||||
pxor mm0, mm0 ;
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil4x4_var_mmx_loop:
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm3, mm5 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
pmullw mm3, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
movd mm3, [rdi] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
paddw mm6, mm1 ;
|
||||
|
||||
pmaddwd mm1, mm1 ;
|
||||
paddd mm7, mm1 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil4x4_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(6) ;sum
|
||||
mov rsi, arg(7) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
;void vp8_filter_block2d_bil_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
mov rax, arg(5) ;HFilter ;
|
||||
|
||||
mov rdx, arg(6) ;VFilter ;
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
movq mm1, [rsi] ;
|
||||
|
||||
movq mm3, [rsi+1] ;
|
||||
movq mm2, mm1 ;
|
||||
|
||||
movq mm4, mm3 ;
|
||||
punpcklbw mm1, mm0 ;
|
||||
|
||||
punpckhbw mm2, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
pmullw mm2, [rax] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
punpckhbw mm4, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
pmullw mm4, [rax+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
paddw mm2, mm4 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
packuswb mm5, mm2 ;
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil_var_mmx_loop:
|
||||
|
||||
movq mm1, [rsi] ;
|
||||
movq mm3, [rsi+1] ;
|
||||
|
||||
movq mm2, mm1 ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
punpckhbw mm2, mm0 ;
|
||||
|
||||
pmullw mm1, [rax] ;
|
||||
pmullw mm2, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, [rax+8] ;
|
||||
pmullw mm4, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, mm5 ;
|
||||
movq mm4, mm5 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
packuswb mm5, mm2 ;
|
||||
|
||||
pmullw mm3, [rdx] ;
|
||||
pmullw mm4, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
pmullw mm2, [rdx+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, [rdi] ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
psubw mm2, mm4 ;
|
||||
|
||||
paddw mm6, mm1 ;
|
||||
pmaddwd mm1, mm1 ;
|
||||
|
||||
paddw mm6, mm2 ;
|
||||
pmaddwd mm2, mm2 ;
|
||||
|
||||
paddd mm7, mm1 ;
|
||||
paddd mm7, mm2 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(7) ;sum
|
||||
mov rsi, arg(8) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
|
||||
align 16
|
||||
mmx_bi_rd:
|
||||
times 4 dw 64
|
||||
+1
-154
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx
|
||||
short *filter
|
||||
);
|
||||
|
||||
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
|
||||
extern unsigned int vp8_get8x8var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern unsigned int vp8_get4x4var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern void vp8_filter_block2d_bil4x4_var_mmx
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
|
||||
unsigned int vp8_variance4x4_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_mse16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
*sse = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance8x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_sub_pixel_variance4x4_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
+1
-155
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
extern unsigned int vp8_get4x4var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
|
||||
unsigned int vp8_get_mb_ss_sse2
|
||||
(
|
||||
const short *src_ptr
|
||||
);
|
||||
unsigned int vp8_get16x16var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
unsigned int vp8_get8x8var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
void vp8_filter_block2d_bil_var_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
unsigned int vp8_variance4x4_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0;
|
||||
int sum0;
|
||||
|
||||
|
||||
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
*sse = sse0;
|
||||
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
|
||||
}
|
||||
unsigned int vp8_mse16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
|
||||
unsigned int sse0;
|
||||
int sum0;
|
||||
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
*sse = sse0;
|
||||
return sse0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance4x4_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -73,8 +73,8 @@ void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
else
|
||||
{
|
||||
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
|
||||
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
|
||||
}
|
||||
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
|
||||
}
|
||||
else
|
||||
QIndex = pc->base_qindex;
|
||||
@@ -101,6 +101,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
int i;
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
int corruption_detected = 0;
|
||||
#else
|
||||
(void)mb_idx;
|
||||
#endif
|
||||
|
||||
if (xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
@@ -140,7 +142,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
* Better to use the predictor as reconstruction.
|
||||
*/
|
||||
pbi->frame_corrupt_residual = 1;
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
vp8_conceal_corrupt_mb(xd);
|
||||
|
||||
|
||||
@@ -149,7 +151,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
/* force idct to be skipped for B_PRED and use the
|
||||
* prediction only for reconstruction
|
||||
* */
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -182,7 +184,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
/* clear out residual eob info */
|
||||
if(xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
|
||||
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
|
||||
|
||||
@@ -212,7 +214,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
(b->qcoeff[0] * DQC[0],
|
||||
dst, dst_stride,
|
||||
dst, dst_stride);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -249,14 +251,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
|
||||
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
|
||||
/* override the dc dequant constant in order to preserve the
|
||||
@@ -321,7 +323,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)Border; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -336,7 +338,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -349,7 +351,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
}
|
||||
@@ -377,7 +379,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)Border; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -395,7 +397,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -409,7 +411,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
}
|
||||
@@ -444,8 +446,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -468,8 +470,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -488,8 +490,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -566,7 +568,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
|
||||
|
||||
/* reset contexts */
|
||||
xd->above_context = pc->above_context;
|
||||
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
@@ -916,19 +918,19 @@ static void init_frame(VP8D_COMP *pbi)
|
||||
if (pc->frame_type == KEY_FRAME)
|
||||
{
|
||||
/* Various keyframe initializations */
|
||||
vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
|
||||
vp8_init_mbmode_probs(pc);
|
||||
|
||||
vp8_default_coef_probs(pc);
|
||||
|
||||
/* reset the segment feature data to 0 with delta coding (Default state). */
|
||||
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
|
||||
|
||||
/* reset the mode ref deltasa for loop filter */
|
||||
vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
|
||||
vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
|
||||
memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
|
||||
memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
|
||||
|
||||
/* All buffers are implicitly updated on key frames. */
|
||||
pc->refresh_golden_frame = 1;
|
||||
@@ -1067,12 +1069,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
pc->vert_scale = clear[6] >> 6;
|
||||
}
|
||||
data += 7;
|
||||
clear += 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
}
|
||||
}
|
||||
if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME))
|
||||
@@ -1104,7 +1105,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
{
|
||||
xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc);
|
||||
|
||||
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
|
||||
/* For each segmentation feature (Quant and loop filter level) */
|
||||
for (i = 0; i < MB_LVL_MAX; i++)
|
||||
@@ -1128,7 +1129,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
if (xd->update_mb_segmentation_map)
|
||||
{
|
||||
/* Which macro block level features are enabled */
|
||||
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
|
||||
memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
|
||||
|
||||
/* Read the probs used to decode the segment id for each macro block. */
|
||||
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
|
||||
@@ -1277,7 +1278,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
#endif
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
|
||||
memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
|
||||
}
|
||||
|
||||
pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc);
|
||||
@@ -1326,7 +1327,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
}
|
||||
|
||||
/* clear out the coeff buffer */
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
|
||||
vp8_decode_mode_mvs(pbi);
|
||||
|
||||
@@ -1340,7 +1341,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
}
|
||||
#endif
|
||||
|
||||
vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
|
||||
memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
|
||||
pbi->frame_corrupt_residual = 0;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
@@ -1379,7 +1380,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
|
||||
memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
|
||||
pbi->independent_partitions = prev_independent_partitions;
|
||||
}
|
||||
|
||||
|
||||
@@ -591,6 +591,8 @@ static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
|
||||
static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi,
|
||||
MB_MODE_INFO *mbmi)
|
||||
{
|
||||
(void)mbmi;
|
||||
|
||||
/* Read the Macroblock segmentation map if it is being updated explicitly
|
||||
* this frame (reset to 0 above by default)
|
||||
* By default on a key frame reset all MBs to segment 0
|
||||
|
||||
@@ -20,8 +20,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
|
||||
ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
|
||||
ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
|
||||
|
||||
vpx_memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
vpx_memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
|
||||
/* Clear entropy contexts for Y2 blocks */
|
||||
if (!x->mode_info_context->mbmi.is_4x4)
|
||||
|
||||
@@ -350,7 +350,7 @@ static void estimate_missing_mvs(MB_OVERLAP *overlaps,
|
||||
unsigned int first_corrupt)
|
||||
{
|
||||
int mb_row, mb_col;
|
||||
vpx_memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
|
||||
memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
|
||||
/* First calculate the overlaps for all blocks */
|
||||
for (mb_row = 0; mb_row < mb_rows; ++mb_row)
|
||||
{
|
||||
|
||||
@@ -58,7 +58,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
|
||||
if (!pbi)
|
||||
return NULL;
|
||||
|
||||
vpx_memset(pbi, 0, sizeof(VP8D_COMP));
|
||||
memset(pbi, 0, sizeof(VP8D_COMP));
|
||||
|
||||
if (setjmp(pbi->common.error.jmp))
|
||||
{
|
||||
@@ -87,6 +87,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
|
||||
pbi->ec_enabled = oxcf->error_concealment;
|
||||
pbi->overlaps = NULL;
|
||||
#else
|
||||
(void)oxcf;
|
||||
pbi->ec_enabled = 0;
|
||||
#endif
|
||||
/* Error concealment is activated after a key frame has been
|
||||
@@ -258,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
|
||||
return err;
|
||||
}
|
||||
|
||||
int check_fragments_for_errors(VP8D_COMP *pbi)
|
||||
static int check_fragments_for_errors(VP8D_COMP *pbi)
|
||||
{
|
||||
if (!pbi->ec_active &&
|
||||
pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)
|
||||
@@ -303,6 +304,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
|
||||
{
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
int retcode = -1;
|
||||
(void)size;
|
||||
(void)source;
|
||||
|
||||
pbi->common.error.error_code = VPX_CODEC_OK;
|
||||
|
||||
@@ -407,6 +410,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
|
||||
#if CONFIG_POSTPROC
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
|
||||
#else
|
||||
(void)flags;
|
||||
|
||||
if (pbi->common.frame_to_show)
|
||||
{
|
||||
|
||||
@@ -60,12 +60,12 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
|
||||
|
||||
mbd->segmentation_enabled = xd->segmentation_enabled;
|
||||
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
|
||||
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
|
||||
memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
|
||||
|
||||
/*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
|
||||
vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
|
||||
memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
|
||||
/*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
|
||||
vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
|
||||
memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
|
||||
/*unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;*/
|
||||
mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
|
||||
@@ -73,10 +73,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
|
||||
|
||||
mbd->current_bc = &pbi->mbc[0];
|
||||
|
||||
vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
|
||||
mbd->fullpixel_mask = 0xffffffff;
|
||||
|
||||
@@ -96,6 +96,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
int i;
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
int corruption_detected = 0;
|
||||
#else
|
||||
(void)mb_idx;
|
||||
#endif
|
||||
|
||||
if (xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
@@ -135,7 +137,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
* Better to use the predictor as reconstruction.
|
||||
*/
|
||||
pbi->frame_corrupt_residual = 1;
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
vp8_conceal_corrupt_mb(xd);
|
||||
|
||||
|
||||
@@ -144,7 +146,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
/* force idct to be skipped for B_PRED and use the
|
||||
* prediction only for reconstruction
|
||||
* */
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -177,7 +179,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
/* clear out residual eob info */
|
||||
if(xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
|
||||
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
|
||||
|
||||
@@ -227,7 +229,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
{
|
||||
vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
|
||||
dst, dst_stride, dst, dst_stride);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -264,14 +266,14 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
|
||||
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
|
||||
/* override the dc dequant constant in order to preserve the
|
||||
@@ -358,7 +360,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
|
||||
|
||||
/* reset contexts */
|
||||
xd->above_context = pc->above_context;
|
||||
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
@@ -497,9 +499,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
|
||||
if( mb_row != pc->mb_rows-1 )
|
||||
{
|
||||
/* Save decoded MB last row data for next-row decoding */
|
||||
vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
|
||||
vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
|
||||
vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
|
||||
memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
|
||||
memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
|
||||
memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
|
||||
}
|
||||
|
||||
/* save left_col for next MB decoding */
|
||||
@@ -874,23 +876,23 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
if (filter_level)
|
||||
{
|
||||
/* Set above_row buffer to 127 for decoding first MB row */
|
||||
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
|
||||
vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
|
||||
memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
|
||||
for (j=1; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
}
|
||||
|
||||
/* Set left_col to 129 initially */
|
||||
for (j=0; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
|
||||
vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
|
||||
memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
|
||||
memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
|
||||
memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
|
||||
}
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
|
||||
@@ -1,310 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_start_encode|
|
||||
EXPORT |vp8_encode_bool|
|
||||
EXPORT |vp8_stop_encode|
|
||||
EXPORT |vp8_encode_value|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 unsigned char *source
|
||||
; r2 unsigned char *source_end
|
||||
|vp8_start_encode| PROC
|
||||
str r2, [r0, #vp8_writer_buffer_end]
|
||||
mov r12, #0
|
||||
mov r3, #255
|
||||
mvn r2, #23
|
||||
str r12, [r0, #vp8_writer_lowvalue]
|
||||
str r3, [r0, #vp8_writer_range]
|
||||
str r2, [r0, #vp8_writer_count]
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
str r1, [r0, #vp8_writer_buffer]
|
||||
bx lr
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 int bit
|
||||
; r2 int probability
|
||||
|vp8_encode_bool| PROC
|
||||
push {r4-r10, lr}
|
||||
|
||||
mov r4, r2
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
cmp r1, #0
|
||||
mul r6, r4, r7 ; ((range-1) * probability)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
addne r2, r2, r4 ; if (bit) lowvalue += split
|
||||
subne r4, r5, r4 ; if (bit) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r1, [r7, r4]
|
||||
cmpge r1, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r1, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r1, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r1 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r10, pc}
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
|vp8_stop_encode| PROC
|
||||
push {r4-r10, lr}
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
mov r10, #32
|
||||
|
||||
stop_encode_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_se ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_se
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_se
|
||||
token_zero_while_loop_se
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_se
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r1, [r7, r4]
|
||||
cmpge r1, #0xff
|
||||
beq token_zero_while_loop_se
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_se
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r1, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r1, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r1 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_se
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r10, r10, #1
|
||||
bne stop_encode_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 int data
|
||||
; r2 int bits
|
||||
|vp8_encode_value| PROC
|
||||
push {r4-r12, lr}
|
||||
|
||||
mov r10, r2
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
rsb r4, r10, #32 ; 32-n
|
||||
|
||||
; v is kept in r1 during the token pack loop
|
||||
lsl r1, r1, r4 ; r1 = v << 32 - n
|
||||
|
||||
encode_value_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r1, r1, #1 ; bit = v >> n
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bit) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bit) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_ev ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_ev
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_ev
|
||||
token_zero_while_loop_ev
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_ev
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop_ev
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_ev
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_ev
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r10, r10, #1
|
||||
bne encode_value_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -1,317 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
|
||||
; r0 vp8_writer *w
|
||||
; r1 const TOKENEXTRA *p
|
||||
; r2 int xcount
|
||||
; r3 vp8_coef_encodings
|
||||
; s0 vp8_extra_bits
|
||||
; s1 vp8_coef_tree
|
||||
|vp8cx_pack_tokens_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #16
|
||||
|
||||
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
|
||||
; sizeof (TOKENEXTRA) is 8
|
||||
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
|
||||
str r2, [sp, #0]
|
||||
str r3, [sp, #8] ; save vp8_coef_encodings
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
b check_p_lt_stop
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #8] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #60] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #60] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #56] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
add sp, sp, #16
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -1,352 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 VP8_COMP *cpi
|
||||
; r1 vp8_writer *w
|
||||
; r2 vp8_coef_encodings
|
||||
; r3 vp8_extra_bits
|
||||
; s0 vp8_coef_tree
|
||||
|
||||
|vp8cx_pack_mb_row_tokens_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #24
|
||||
|
||||
; Compute address of cpi->common.mb_rows
|
||||
ldr r4, _VP8_COMP_common_
|
||||
ldr r6, _VP8_COMMON_MBrows_
|
||||
add r4, r0, r4
|
||||
|
||||
ldr r5, [r4, r6] ; load up mb_rows
|
||||
|
||||
str r2, [sp, #20] ; save vp8_coef_encodings
|
||||
str r5, [sp, #12] ; save mb_rows
|
||||
str r3, [sp, #8] ; save vp8_extra_bits
|
||||
|
||||
ldr r4, _VP8_COMP_tplist_
|
||||
add r4, r0, r4
|
||||
ldr r7, [r4, #0] ; dereference cpi->tp_list
|
||||
|
||||
mov r0, r1 ; keep same as other loops
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
mb_row_loop
|
||||
|
||||
ldr r1, [r7, #tokenlist_start]
|
||||
ldr r9, [r7, #tokenlist_stop]
|
||||
str r9, [sp, #0] ; save stop for later comparison
|
||||
str r7, [sp, #16] ; tokenlist address for next time
|
||||
|
||||
b check_p_lt_stop
|
||||
|
||||
; actuall work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #20] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #64] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #64] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #8] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
ldr r6, [sp, #12] ; mb_rows
|
||||
ldr r7, [sp, #16] ; tokenlist address
|
||||
subs r6, r6, #1
|
||||
add r7, r7, #TOKENLIST_SZ ; next element in the array
|
||||
str r6, [sp, #12]
|
||||
bne mb_row_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
add sp, sp, #24
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
_VP8_COMP_common_
|
||||
DCD vp8_comp_common
|
||||
_VP8_COMMON_MBrows_
|
||||
DCD vp8_common_mb_rows
|
||||
_VP8_COMP_tplist_
|
||||
DCD vp8_comp_tplist
|
||||
|
||||
END
|
||||
@@ -1,471 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 VP8_COMP *cpi
|
||||
; r1 unsigned char *cx_data
|
||||
; r2 const unsigned char *cx_data_end
|
||||
; r3 int num_part
|
||||
; s0 vp8_coef_encodings
|
||||
; s1 vp8_extra_bits,
|
||||
; s2 const vp8_tree_index *
|
||||
|
||||
|vp8cx_pack_tokens_into_partitions_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #40
|
||||
|
||||
; Compute address of cpi->common.mb_rows
|
||||
ldr r4, _VP8_COMP_common_
|
||||
ldr r6, _VP8_COMMON_MBrows_
|
||||
add r4, r0, r4
|
||||
|
||||
ldr r5, [r4, r6] ; load up mb_rows
|
||||
|
||||
str r5, [sp, #36] ; save mb_rows
|
||||
str r1, [sp, #24] ; save ptr = cx_data
|
||||
str r3, [sp, #20] ; save num_part
|
||||
str r2, [sp, #8] ; save cx_data_end
|
||||
|
||||
ldr r4, _VP8_COMP_tplist_
|
||||
add r4, r0, r4
|
||||
ldr r7, [r4, #0] ; dereference cpi->tp_list
|
||||
str r7, [sp, #32] ; store start of cpi->tp_list
|
||||
|
||||
ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
|
||||
add r0, r0, r11
|
||||
|
||||
mov r11, #0
|
||||
str r11, [sp, #28] ; i
|
||||
|
||||
numparts_loop
|
||||
ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
|
||||
add r0, r2 ; bc[i + 1]
|
||||
|
||||
ldr r10, [sp, #24] ; ptr
|
||||
ldr r5, [sp, #36] ; move mb_rows to the counting section
|
||||
subs r5, r5, r11 ; move start point with each partition
|
||||
; mb_rows starts at i
|
||||
str r5, [sp, #12]
|
||||
|
||||
; Reset all of the VP8 Writer data for each partition that
|
||||
; is processed.
|
||||
; start_encode
|
||||
|
||||
ldr r3, [sp, #8]
|
||||
str r3, [r0, #vp8_writer_buffer_end]
|
||||
|
||||
mov r2, #0 ; vp8_writer_lowvalue
|
||||
mov r5, #255 ; vp8_writer_range
|
||||
mvn r3, #23 ; vp8_writer_count
|
||||
|
||||
str r2, [r0, #vp8_writer_pos]
|
||||
str r10, [r0, #vp8_writer_buffer]
|
||||
|
||||
ble end_partition ; if (mb_rows <= 0) end partition
|
||||
|
||||
mb_row_loop
|
||||
|
||||
ldr r1, [r7, #tokenlist_start]
|
||||
ldr r9, [r7, #tokenlist_stop]
|
||||
str r9, [sp, #0] ; save stop for later comparison
|
||||
str r7, [sp, #16] ; tokenlist address for next time
|
||||
|
||||
b check_p_lt_stop
|
||||
|
||||
; actual work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #80] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #88] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #88] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #84] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7 ; count = -8
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
ldr r10, [sp, #20] ; num_parts
|
||||
mov r1, #TOKENLIST_SZ
|
||||
mul r1, r10, r1
|
||||
|
||||
ldr r6, [sp, #12] ; mb_rows
|
||||
ldr r7, [sp, #16] ; tokenlist address
|
||||
subs r6, r6, r10
|
||||
add r7, r7, r1 ; next element in the array
|
||||
str r6, [sp, #12]
|
||||
bgt mb_row_loop
|
||||
|
||||
end_partition
|
||||
mov r12, #32
|
||||
|
||||
stop_encode_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_se ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_se
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_se
|
||||
token_zero_while_loop_se
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_se
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop_se
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_se
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_se
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r12, r12, #1
|
||||
bne stop_encode_loop
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
ldr r12, [sp, #24] ; ptr
|
||||
add r12, r12, r4 ; ptr += w->pos
|
||||
str r12, [sp, #24]
|
||||
|
||||
ldr r11, [sp, #28] ; i
|
||||
ldr r10, [sp, #20] ; num_parts
|
||||
|
||||
add r11, r11, #1 ; i++
|
||||
str r11, [sp, #28]
|
||||
|
||||
ldr r7, [sp, #32] ; cpi->tp_list[i]
|
||||
mov r1, #TOKENLIST_SZ
|
||||
add r7, r7, r1 ; next element in cpi->tp_list
|
||||
str r7, [sp, #32] ; cpi->tp_list[i+1]
|
||||
|
||||
cmp r10, r11
|
||||
bgt numparts_loop
|
||||
|
||||
add sp, sp, #40
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
_VP8_COMP_common_
|
||||
DCD vp8_comp_common
|
||||
_VP8_COMMON_MBrows_
|
||||
DCD vp8_common_mb_rows
|
||||
_VP8_COMP_tplist_
|
||||
DCD vp8_comp_tplist
|
||||
_VP8_COMP_bc_
|
||||
DCD vp8_comp_bc
|
||||
_vp8_writer_sz_
|
||||
DCD vp8_writer_sz
|
||||
|
||||
END
|
||||
@@ -1,225 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_fast_quantize_b_armv6|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *b
|
||||
; r1 BLOCKD *d
|
||||
|vp8_fast_quantize_b_armv6| PROC
|
||||
stmfd sp!, {r1, r4-r11, lr}
|
||||
|
||||
ldr r3, [r0, #vp8_block_coeff] ; coeff
|
||||
ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
|
||||
ldr r5, [r0, #vp8_block_round] ; round
|
||||
ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
|
||||
ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
|
||||
ldr r8, [r1, #vp8_blockd_dequant] ; dequant
|
||||
|
||||
ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
|
||||
; is used to update the counter so that
|
||||
; it can be used to mark nonzero
|
||||
; quantized coefficient pairs.
|
||||
|
||||
mov r1, #0 ; flags for quantized coeffs
|
||||
|
||||
; PART 1: quantization and dequantization loop
|
||||
loop
|
||||
ldr r9, [r3], #4 ; [z1 | z0]
|
||||
ldr r10, [r5], #4 ; [r1 | r0]
|
||||
ldr r11, [r4], #4 ; [q1 | q0]
|
||||
|
||||
ssat16 lr, #1, r9 ; [sz1 | sz0]
|
||||
eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
|
||||
ssub16 r9, r9, lr ; x = (z ^ sz) - sz
|
||||
sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
|
||||
|
||||
ldr r12, [r3], #4 ; [z3 | z2]
|
||||
|
||||
smulbb r0, r9, r11 ; [(x0+r0)*q0]
|
||||
smultt r9, r9, r11 ; [(x1+r1)*q1]
|
||||
|
||||
ldr r10, [r5], #4 ; [r3 | r2]
|
||||
|
||||
ssat16 r11, #1, r12 ; [sz3 | sz2]
|
||||
eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
|
||||
pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
|
||||
ldr r9, [r4], #4 ; [q3 | q2]
|
||||
ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
|
||||
|
||||
sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
|
||||
|
||||
eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
|
||||
|
||||
smulbb r10, r12, r9 ; [(x2+r2)*q2]
|
||||
smultt r12, r12, r9 ; [(x3+r3)*q3]
|
||||
|
||||
ssub16 r0, r0, lr ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r0, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
|
||||
|
||||
str r0, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r9, [r8], #4 ; [dq1 | dq0]
|
||||
|
||||
pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
|
||||
eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
|
||||
ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r10, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
|
||||
|
||||
str r10, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r11, [r8], #4 ; [dq3 | dq2]
|
||||
|
||||
smulbb r12, r0, r9 ; [x0*dq0]
|
||||
smultt r0, r0, r9 ; [x1*dq1]
|
||||
|
||||
smulbb r9, r10, r11 ; [x2*dq2]
|
||||
smultt r10, r10, r11 ; [x3*dq3]
|
||||
|
||||
lsls r2, r2, #2 ; update loop counter
|
||||
strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
|
||||
strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
|
||||
strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
|
||||
strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
|
||||
add r7, r7, #8 ; dqcoeff += 8
|
||||
bne loop
|
||||
|
||||
; PART 2: check position for eob...
|
||||
ldr r11, [sp, #0] ; restore BLOCKD pointer
|
||||
mov lr, #0 ; init eob
|
||||
cmp r1, #0 ; coeffs after quantization?
|
||||
ldr r12, [r11, #vp8_blockd_eob]
|
||||
beq end ; skip eob calculations if all zero
|
||||
|
||||
ldr r0, [r11, #vp8_blockd_qcoeff]
|
||||
|
||||
; check shortcut for nonzero qcoeffs
|
||||
tst r1, #0x80
|
||||
bne quant_coeff_15_14
|
||||
tst r1, #0x20
|
||||
bne quant_coeff_13_11
|
||||
tst r1, #0x8
|
||||
bne quant_coeff_12_7
|
||||
tst r1, #0x40
|
||||
bne quant_coeff_10_9
|
||||
tst r1, #0x10
|
||||
bne quant_coeff_8_3
|
||||
tst r1, #0x2
|
||||
bne quant_coeff_6_5
|
||||
tst r1, #0x4
|
||||
bne quant_coeff_4_2
|
||||
b quant_coeff_1_0
|
||||
|
||||
quant_coeff_15_14
|
||||
ldrh r2, [r0, #30] ; rc=15, i=15
|
||||
mov lr, #16
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #28] ; rc=14, i=14
|
||||
mov lr, #15
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_13_11
|
||||
ldrh r2, [r0, #22] ; rc=11, i=13
|
||||
mov lr, #14
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_12_7
|
||||
ldrh r3, [r0, #14] ; rc=7, i=12
|
||||
mov lr, #13
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #20] ; rc=10, i=11
|
||||
mov lr, #12
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_10_9
|
||||
ldrh r3, [r0, #26] ; rc=13, i=10
|
||||
mov lr, #11
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #24] ; rc=12, i=9
|
||||
mov lr, #10
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_8_3
|
||||
ldrh r3, [r0, #18] ; rc=9, i=8
|
||||
mov lr, #9
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #12] ; rc=6, i=7
|
||||
mov lr, #8
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_6_5
|
||||
ldrh r3, [r0, #6] ; rc=3, i=6
|
||||
mov lr, #7
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #4] ; rc=2, i=5
|
||||
mov lr, #6
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_4_2
|
||||
ldrh r3, [r0, #10] ; rc=5, i=4
|
||||
mov lr, #5
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #16] ; rc=8, i=3
|
||||
mov lr, #4
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #8] ; rc=4, i=2
|
||||
mov lr, #3
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_1_0
|
||||
ldrh r2, [r0, #2] ; rc=1, i=1
|
||||
mov lr, #2
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
mov lr, #1 ; rc=0, i=0
|
||||
|
||||
end
|
||||
strb lr, [r12]
|
||||
ldmfd sp!, {r1, r4-r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
loop_count
|
||||
DCD 0x1000000
|
||||
|
||||
END
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_mse16x16_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
;
|
||||
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
|
||||
; So, we can remove this part of calculation.
|
||||
|
||||
|vp8_mse16x16_armv6| PROC
|
||||
|
||||
push {r4-r9, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov r4, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r5, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r6, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0x4] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
ldr r5, [r0, #0x8] ; load 4 src pixels
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r6, [r2, #0x8] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0xc] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r6, [r2, #0xc] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
subs r12, r12, #1 ; next row
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r1, [sp, #28] ; get address of sse
|
||||
mov r0, r4 ; return sse
|
||||
str r4, [r1] ; store sse
|
||||
|
||||
pop {r4-r9, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -1,272 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_subtract_mby_armv6|
|
||||
EXPORT |vp8_subtract_mbuv_armv6|
|
||||
EXPORT |vp8_subtract_b_armv6|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *be
|
||||
; r1 BLOCKD *bd
|
||||
; r2 int pitch
|
||||
|vp8_subtract_b_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r9}
|
||||
|
||||
ldr r4, [r0, #vp8_block_base_src]
|
||||
ldr r5, [r0, #vp8_block_src]
|
||||
ldr r6, [r0, #vp8_block_src_diff]
|
||||
|
||||
ldr r3, [r4]
|
||||
ldr r7, [r0, #vp8_block_src_stride]
|
||||
add r3, r3, r5 ; src = *base_src + src
|
||||
ldr r8, [r1, #vp8_blockd_predictor]
|
||||
|
||||
mov r9, #4 ; loop count
|
||||
|
||||
loop_block
|
||||
|
||||
ldr r0, [r3], r7 ; src
|
||||
ldr r1, [r8], r2 ; pred
|
||||
|
||||
uxtb16 r4, r0 ; [s2 | s0]
|
||||
uxtb16 r5, r1 ; [p2 | p0]
|
||||
uxtb16 r0, r0, ror #8 ; [s3 | s1]
|
||||
uxtb16 r1, r1, ror #8 ; [p3 | p1]
|
||||
|
||||
usub16 r4, r4, r5 ; [d2 | d0]
|
||||
usub16 r5, r0, r1 ; [d3 | d1]
|
||||
|
||||
subs r9, r9, #1 ; decrement loop counter
|
||||
|
||||
pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
|
||||
pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
|
||||
|
||||
str r0, [r6, #0] ; diff
|
||||
str r1, [r6, #4] ; diff
|
||||
|
||||
add r6, r6, r2, lsl #1 ; update diff pointer
|
||||
bne loop_block
|
||||
|
||||
ldmfd sp!, {r4-r9}
|
||||
mov pc, lr
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
; r0 short *diff
|
||||
; r1 unsigned char *usrc
|
||||
; r2 unsigned char *vsrc
|
||||
; r3 int src_stride
|
||||
; sp unsigned char *upred
|
||||
; sp unsigned char *vpred
|
||||
; sp int pred_stride
|
||||
|vp8_subtract_mbuv_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r11}
|
||||
|
||||
add r0, r0, #512 ; set *diff point to Cb
|
||||
mov r4, #8 ; loop count
|
||||
ldr r5, [sp, #32] ; upred
|
||||
ldr r12, [sp, #40] ; pred_stride
|
||||
|
||||
; Subtract U block
|
||||
loop_u
|
||||
ldr r6, [r1] ; usrc (A)
|
||||
ldr r7, [r5] ; upred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r1, #4] ; usrc (B)
|
||||
ldr r11, [r5, #4] ; upred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
add r1, r1, r3 ; update usrc pointer
|
||||
add r5, r5, r12 ; update upred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
bne loop_u
|
||||
|
||||
ldr r5, [sp, #36] ; vpred
|
||||
mov r4, #8 ; loop count
|
||||
|
||||
; Subtract V block
|
||||
loop_v
|
||||
ldr r6, [r2] ; vsrc (A)
|
||||
ldr r7, [r5] ; vpred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r2, #4] ; vsrc (B)
|
||||
ldr r11, [r5, #4] ; vpred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
add r2, r2, r3 ; update vsrc pointer
|
||||
add r5, r5, r12 ; update vpred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
bne loop_v
|
||||
|
||||
ldmfd sp!, {r4-r11}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
; r0 short *diff
|
||||
; r1 unsigned char *src
|
||||
; r2 int src_stride
|
||||
; r3 unsigned char *pred
|
||||
; sp int pred_stride
|
||||
|vp8_subtract_mby_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r11}
|
||||
ldr r12, [sp, #32] ; pred_stride
|
||||
mov r4, #16
|
||||
loop
|
||||
ldr r6, [r1] ; src (A)
|
||||
ldr r7, [r3] ; pred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r1, #4] ; src (B)
|
||||
ldr r11, [r3, #4] ; pred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
ldr r10, [r1, #8] ; src (C)
|
||||
ldr r11, [r3, #8] ; pred (C)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (C)
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (C)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (C)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (C)
|
||||
|
||||
ldr r10, [r1, #12] ; src (D)
|
||||
ldr r11, [r3, #12] ; pred (D)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
|
||||
|
||||
str r8, [r0], #4 ; diff (C)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (D)
|
||||
str r9, [r0], #4 ; diff (C)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (D)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (D)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (D)
|
||||
|
||||
add r1, r1, r2 ; update src pointer
|
||||
add r3, r3, r12 ; update pred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
|
||||
|
||||
str r8, [r0], #4 ; diff (D)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (D)
|
||||
|
||||
bne loop
|
||||
|
||||
ldmfd sp!, {r4-r11}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
|
||||
const unsigned int vp8_prob_cost[256] =
|
||||
{
|
||||
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
|
||||
1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
|
||||
767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
|
||||
617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
|
||||
511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
|
||||
428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
|
||||
361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
|
||||
304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
|
||||
255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
|
||||
211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
|
||||
172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
|
||||
137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
|
||||
105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
|
||||
75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
|
||||
48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
|
||||
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
|
||||
};
|
||||
|
||||
int vp8_validate_buffer_arm(const unsigned char *start,
|
||||
size_t len,
|
||||
const unsigned char *end,
|
||||
struct vpx_internal_error_info *error)
|
||||
{
|
||||
return validate_buffer(start, len, end, error);
|
||||
}
|
||||
@@ -1,258 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_fast_quantize_b_neon|
|
||||
EXPORT |vp8_fast_quantize_b_pair_neon|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=4
|
||||
|
||||
;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
|
||||
|vp8_fast_quantize_b_pair_neon| PROC
|
||||
|
||||
stmfd sp!, {r4-r9}
|
||||
vstmdb sp!, {q4-q7}
|
||||
|
||||
ldr r4, [r0, #vp8_block_coeff]
|
||||
ldr r5, [r0, #vp8_block_quant_fast]
|
||||
ldr r6, [r0, #vp8_block_round]
|
||||
|
||||
vld1.16 {q0, q1}, [r4@128] ; load z
|
||||
|
||||
ldr r7, [r2, #vp8_blockd_qcoeff]
|
||||
|
||||
vabs.s16 q4, q0 ; calculate x = abs(z)
|
||||
vabs.s16 q5, q1
|
||||
|
||||
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
|
||||
vshr.s16 q2, q0, #15 ; sz
|
||||
vshr.s16 q3, q1, #15
|
||||
|
||||
vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15]
|
||||
vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15]
|
||||
|
||||
ldr r4, [r1, #vp8_block_coeff]
|
||||
|
||||
vadd.s16 q4, q6 ; x + Round
|
||||
vadd.s16 q5, q7
|
||||
|
||||
vld1.16 {q0, q1}, [r4@128] ; load z2
|
||||
|
||||
vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16
|
||||
vqdmulh.s16 q5, q9
|
||||
|
||||
vabs.s16 q10, q0 ; calculate x2 = abs(z_2)
|
||||
vabs.s16 q11, q1
|
||||
vshr.s16 q12, q0, #15 ; sz2
|
||||
vshr.s16 q13, q1, #15
|
||||
|
||||
;modify data to have its original sign
|
||||
veor.s16 q4, q2 ; y^sz
|
||||
veor.s16 q5, q3
|
||||
|
||||
vadd.s16 q10, q6 ; x2 + Round
|
||||
vadd.s16 q11, q7
|
||||
|
||||
ldr r8, [r2, #vp8_blockd_dequant]
|
||||
|
||||
vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16
|
||||
vqdmulh.s16 q11, q9
|
||||
|
||||
vshr.s16 q4, #1 ; right shift 1 after vqdmulh
|
||||
vshr.s16 q5, #1
|
||||
|
||||
vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i]
|
||||
|
||||
vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
|
||||
vsub.s16 q5, q3
|
||||
|
||||
vshr.s16 q10, #1 ; right shift 1 after vqdmulh
|
||||
vshr.s16 q11, #1
|
||||
|
||||
ldr r9, [r2, #vp8_blockd_dqcoeff]
|
||||
|
||||
veor.s16 q10, q12 ; y2^sz2
|
||||
veor.s16 q11, q13
|
||||
|
||||
vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1
|
||||
|
||||
|
||||
vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
|
||||
vsub.s16 q11, q13
|
||||
|
||||
ldr r6, [r3, #vp8_blockd_qcoeff]
|
||||
|
||||
vmul.s16 q2, q6, q4 ; x * Dequant
|
||||
vmul.s16 q3, q7, q5
|
||||
|
||||
adr r0, inv_zig_zag ; load ptr of inverse zigzag table
|
||||
|
||||
vceq.s16 q8, q8 ; set q8 to all 1
|
||||
|
||||
vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2
|
||||
|
||||
vmul.s16 q12, q6, q10 ; x2 * Dequant
|
||||
vmul.s16 q13, q7, q11
|
||||
|
||||
vld1.16 {q6, q7}, [r0@128] ; load inverse scan order
|
||||
|
||||
vtst.16 q14, q4, q8 ; now find eob
|
||||
vtst.16 q15, q5, q8 ; non-zero element is set to all 1
|
||||
|
||||
vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant
|
||||
|
||||
ldr r7, [r3, #vp8_blockd_dqcoeff]
|
||||
|
||||
vand q0, q6, q14 ; get all valid numbers from scan array
|
||||
vand q1, q7, q15
|
||||
|
||||
vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant
|
||||
|
||||
vtst.16 q2, q10, q8 ; now find eob
|
||||
vtst.16 q3, q11, q8 ; non-zero element is set to all 1
|
||||
|
||||
vmax.u16 q0, q0, q1 ; find maximum value in q0, q1
|
||||
|
||||
vand q10, q6, q2 ; get all valid numbers from scan array
|
||||
vand q11, q7, q3
|
||||
vmax.u16 q10, q10, q11 ; find maximum value in q10, q11
|
||||
|
||||
vmax.u16 d0, d0, d1
|
||||
vmax.u16 d20, d20, d21
|
||||
vmovl.u16 q0, d0
|
||||
vmovl.u16 q10, d20
|
||||
|
||||
vmax.u32 d0, d0, d1
|
||||
vmax.u32 d20, d20, d21
|
||||
vpmax.u32 d0, d0, d0
|
||||
vpmax.u32 d20, d20, d20
|
||||
|
||||
ldr r4, [r2, #vp8_blockd_eob]
|
||||
ldr r5, [r3, #vp8_blockd_eob]
|
||||
|
||||
vst1.8 {d0[0]}, [r4] ; store eob
|
||||
vst1.8 {d20[0]}, [r5] ; store eob
|
||||
|
||||
vldmia sp!, {q4-q7}
|
||||
ldmfd sp!, {r4-r9}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
|vp8_fast_quantize_b_neon| PROC
|
||||
|
||||
stmfd sp!, {r4-r7}
|
||||
|
||||
ldr r3, [r0, #vp8_block_coeff]
|
||||
ldr r4, [r0, #vp8_block_quant_fast]
|
||||
ldr r5, [r0, #vp8_block_round]
|
||||
|
||||
vld1.16 {q0, q1}, [r3@128] ; load z
|
||||
vorr.s16 q14, q0, q1 ; check if all zero (step 1)
|
||||
ldr r6, [r1, #vp8_blockd_qcoeff]
|
||||
ldr r7, [r1, #vp8_blockd_dqcoeff]
|
||||
vorr.s16 d28, d28, d29 ; check if all zero (step 2)
|
||||
|
||||
vabs.s16 q12, q0 ; calculate x = abs(z)
|
||||
vabs.s16 q13, q1
|
||||
|
||||
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
|
||||
vshr.s16 q2, q0, #15 ; sz
|
||||
vmov r2, r3, d28 ; check if all zero (step 3)
|
||||
vshr.s16 q3, q1, #15
|
||||
|
||||
vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]
|
||||
vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]
|
||||
|
||||
vadd.s16 q12, q14 ; x + Round
|
||||
vadd.s16 q13, q15
|
||||
|
||||
adr r0, inv_zig_zag ; load ptr of inverse zigzag table
|
||||
|
||||
vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16
|
||||
vqdmulh.s16 q13, q9
|
||||
|
||||
vld1.16 {q10, q11}, [r0@128]; load inverse scan order
|
||||
|
||||
vceq.s16 q8, q8 ; set q8 to all 1
|
||||
|
||||
ldr r4, [r1, #vp8_blockd_dequant]
|
||||
|
||||
vshr.s16 q12, #1 ; right shift 1 after vqdmulh
|
||||
vshr.s16 q13, #1
|
||||
|
||||
ldr r5, [r1, #vp8_blockd_eob]
|
||||
|
||||
orr r2, r2, r3 ; check if all zero (step 4)
|
||||
cmp r2, #0 ; check if all zero (step 5)
|
||||
beq zero_output ; check if all zero (step 6)
|
||||
|
||||
;modify data to have its original sign
|
||||
veor.s16 q12, q2 ; y^sz
|
||||
veor.s16 q13, q3
|
||||
|
||||
vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
|
||||
vsub.s16 q13, q3
|
||||
|
||||
vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i]
|
||||
|
||||
vtst.16 q14, q12, q8 ; now find eob
|
||||
vtst.16 q15, q13, q8 ; non-zero element is set to all 1
|
||||
|
||||
vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1
|
||||
|
||||
vand q10, q10, q14 ; get all valid numbers from scan array
|
||||
vand q11, q11, q15
|
||||
|
||||
|
||||
vmax.u16 q0, q10, q11 ; find maximum value in q0, q1
|
||||
vmax.u16 d0, d0, d1
|
||||
vmovl.u16 q0, d0
|
||||
|
||||
vmul.s16 q2, q12 ; x * Dequant
|
||||
vmul.s16 q3, q13
|
||||
|
||||
vmax.u32 d0, d0, d1
|
||||
vpmax.u32 d0, d0, d0
|
||||
|
||||
vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
|
||||
|
||||
vst1.8 {d0[0]}, [r5] ; store eob
|
||||
|
||||
ldmfd sp!, {r4-r7}
|
||||
bx lr
|
||||
|
||||
zero_output
|
||||
strb r2, [r5] ; store eob
|
||||
vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
|
||||
vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
|
||||
|
||||
ldmfd sp!, {r4-r7}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
; default inverse zigzag table is defined in vp8/common/entropy.c
|
||||
ALIGN 16 ; enable use of @128 bit aligned loads
|
||||
inv_zig_zag
|
||||
DCW 0x0001, 0x0002, 0x0006, 0x0007
|
||||
DCW 0x0003, 0x0005, 0x0008, 0x000d
|
||||
DCW 0x0004, 0x0009, 0x000c, 0x000e
|
||||
DCW 0x000a, 0x000b, 0x000f, 0x0010
|
||||
|
||||
END
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "vp8/encoder/block.h"
|
||||
|
||||
static const uint16_t inv_zig_zag[16] = {
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
|
||||
const int16x8_t one_q = vdupq_n_s16(-1),
|
||||
z0 = vld1q_s16(b->coeff),
|
||||
z1 = vld1q_s16(b->coeff + 8),
|
||||
round0 = vld1q_s16(b->round),
|
||||
round1 = vld1q_s16(b->round + 8),
|
||||
quant0 = vld1q_s16(b->quant_fast),
|
||||
quant1 = vld1q_s16(b->quant_fast + 8),
|
||||
dequant0 = vld1q_s16(d->dequant),
|
||||
dequant1 = vld1q_s16(d->dequant + 8);
|
||||
const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag),
|
||||
zig_zag1 = vld1q_u16(inv_zig_zag + 8);
|
||||
int16x8_t x0, x1, sz0, sz1, y0, y1;
|
||||
uint16x8_t eob0, eob1;
|
||||
uint16x4_t eob_d16;
|
||||
uint32x2_t eob_d32;
|
||||
uint32x4_t eob_q32;
|
||||
|
||||
/* sign of z: z >> 15 */
|
||||
sz0 = vshrq_n_s16(z0, 15);
|
||||
sz1 = vshrq_n_s16(z1, 15);
|
||||
|
||||
/* x = abs(z) */
|
||||
x0 = vabsq_s16(z0);
|
||||
x1 = vabsq_s16(z1);
|
||||
|
||||
/* x += round */
|
||||
x0 = vaddq_s16(x0, round0);
|
||||
x1 = vaddq_s16(x1, round1);
|
||||
|
||||
/* y = 2 * (x * quant) >> 16 */
|
||||
y0 = vqdmulhq_s16(x0, quant0);
|
||||
y1 = vqdmulhq_s16(x1, quant1);
|
||||
|
||||
/* Compensate for doubling in vqdmulhq */
|
||||
y0 = vshrq_n_s16(y0, 1);
|
||||
y1 = vshrq_n_s16(y1, 1);
|
||||
|
||||
/* Restore sign bit */
|
||||
y0 = veorq_s16(y0, sz0);
|
||||
y1 = veorq_s16(y1, sz1);
|
||||
x0 = vsubq_s16(y0, sz0);
|
||||
x1 = vsubq_s16(y1, sz1);
|
||||
|
||||
/* find non-zero elements */
|
||||
eob0 = vtstq_s16(x0, one_q);
|
||||
eob1 = vtstq_s16(x1, one_q);
|
||||
|
||||
/* mask zig zag */
|
||||
eob0 = vandq_u16(eob0, zig_zag0);
|
||||
eob1 = vandq_u16(eob1, zig_zag1);
|
||||
|
||||
/* select the largest value */
|
||||
eob0 = vmaxq_u16(eob0, eob1);
|
||||
eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
|
||||
eob_q32 = vmovl_u16(eob_d16);
|
||||
eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32));
|
||||
eob_d32 = vpmax_u32(eob_d32, eob_d32);
|
||||
|
||||
/* qcoeff = x */
|
||||
vst1q_s16(d->qcoeff, x0);
|
||||
vst1q_s16(d->qcoeff + 8, x1);
|
||||
|
||||
/* dqcoeff = x * dequant */
|
||||
vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0));
|
||||
vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1));
|
||||
|
||||
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
|
||||
}
|
||||
@@ -1,131 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
unsigned int vp8_mse16x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
q7s32 = vdupq_n_s32(0);
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
|
||||
q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
|
||||
q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q10s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_get4x4sse_cs_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride) {
|
||||
int16x4_t d22s16, d24s16, d26s16, d28s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d1u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d5u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d3u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d7u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d1u8, d5u8);
|
||||
q13u16 = vsubl_u8(d2u8, d6u8);
|
||||
q14u16 = vsubl_u8(d3u8, d7u8);
|
||||
|
||||
d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
|
||||
d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
|
||||
d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
|
||||
d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
|
||||
|
||||
q7s32 = vmull_s16(d22s16, d22s16);
|
||||
q8s32 = vmull_s16(d24s16, d24s16);
|
||||
q9s32 = vmull_s16(d26s16, d26s16);
|
||||
q10s32 = vmull_s16(d28s16, d28s16);
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q9s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q9s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include <math.h>
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp8/encoder/quantize.h"
|
||||
#include "vp8/common/entropy.h"
|
||||
|
||||
|
||||
#if HAVE_NEON
|
||||
|
||||
/* vp8_quantize_mbX functions here differs from corresponding ones in
|
||||
* quantize.c only by using quantize_b_pair function pointer instead of
|
||||
* the regular quantize_b function pointer */
|
||||
void vp8_quantize_mby_neon(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
|
||||
|
||||
for (i = 0; i < 16; i+=2)
|
||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
||||
|
||||
if(has_2nd_order)
|
||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
||||
}
|
||||
|
||||
void vp8_quantize_mb_neon(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
|
||||
|
||||
for (i = 0; i < 24; i+=2)
|
||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
||||
|
||||
if (has_2nd_order)
|
||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
||||
}
|
||||
|
||||
|
||||
void vp8_quantize_mbuv_neon(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 16; i < 24; i+=2)
|
||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
||||
}
|
||||
|
||||
#endif /* HAVE_NEON */
|
||||
@@ -159,7 +159,7 @@ static void write_split(vp8_writer *bc, int x)
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
|
||||
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
|
||||
{
|
||||
const TOKENEXTRA *stop = p + xcount;
|
||||
unsigned int split;
|
||||
@@ -374,7 +374,7 @@ static void write_partition_size(unsigned char *cx_data, int size)
|
||||
|
||||
}
|
||||
|
||||
static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
unsigned char * cx_data_end,
|
||||
int num_part)
|
||||
{
|
||||
@@ -398,7 +398,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
|
||||
int tokens = (int)(stop - p);
|
||||
|
||||
vp8_pack_tokens_c(w, p, tokens);
|
||||
vp8_pack_tokens(w, p, tokens);
|
||||
}
|
||||
|
||||
vp8_stop_encode(w);
|
||||
@@ -407,7 +407,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
}
|
||||
|
||||
|
||||
static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
|
||||
static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w)
|
||||
{
|
||||
int mb_row;
|
||||
|
||||
@@ -417,7 +417,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
|
||||
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
|
||||
int tokens = (int)(stop - p);
|
||||
|
||||
vp8_pack_tokens_c(w, p, tokens);
|
||||
vp8_pack_tokens(w, p, tokens);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1543,7 +1543,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
/* save a copy for later refresh */
|
||||
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
}
|
||||
|
||||
vp8_update_coef_probs(cpi);
|
||||
@@ -1620,7 +1620,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
/* concatenate partition buffers */
|
||||
for(i = 0; i < num_part; i++)
|
||||
{
|
||||
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
|
||||
memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
|
||||
cpi->partition_d[i+1] = dp;
|
||||
dp += cpi->partition_sz[i+1];
|
||||
}
|
||||
@@ -1676,7 +1676,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
pack_mb_row_tokens(cpi, &cpi->bc[1]);
|
||||
else
|
||||
#endif
|
||||
pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
|
||||
vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
|
||||
|
||||
vp8_stop_encode(&cpi->bc[1]);
|
||||
|
||||
|
||||
@@ -16,36 +16,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if HAVE_EDSP
|
||||
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
|
||||
unsigned char * cx_data,
|
||||
const unsigned char *cx_data_end,
|
||||
int num_parts,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
# define pack_tokens(a,b,c) \
|
||||
vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
# define pack_tokens_into_partitions(a,b,c,d) \
|
||||
vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
# define pack_mb_row_tokens(a,b) \
|
||||
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
#else
|
||||
|
||||
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
|
||||
|
||||
# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
|
||||
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
|
||||
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
|
||||
#endif
|
||||
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -126,6 +126,8 @@ typedef struct macroblock
|
||||
|
||||
int optimize;
|
||||
int q_index;
|
||||
int is_skin;
|
||||
int denoise_zeromv;
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
int increase_denoising;
|
||||
@@ -161,8 +163,9 @@ typedef struct macroblock
|
||||
void (*short_fdct8x4)(short *input, short *output, int pitch);
|
||||
void (*short_walsh4x4)(short *input, short *output, int pitch);
|
||||
void (*quantize_b)(BLOCK *b, BLOCKD *d);
|
||||
void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
|
||||
|
||||
unsigned int mbs_zero_last_dot_suppress;
|
||||
int zero_last_dot_suppress;
|
||||
} MACROBLOCK;
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -374,7 +374,7 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
|
||||
} else if (mode == 3) {
|
||||
denoiser->denoiser_mode = kDenoiserOnYUVAggressive;
|
||||
} else {
|
||||
denoiser->denoiser_mode = kDenoiserOnAdaptive;
|
||||
denoiser->denoiser_mode = kDenoiserOnYUV;
|
||||
}
|
||||
if (denoiser->denoiser_mode != kDenoiserOnYUVAggressive) {
|
||||
denoiser->denoise_pars.scale_sse_thresh = 1;
|
||||
@@ -390,10 +390,10 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
|
||||
denoiser->denoise_pars.scale_motion_thresh = 16;
|
||||
denoiser->denoise_pars.scale_increase_filter = 1;
|
||||
denoiser->denoise_pars.denoise_mv_bias = 60;
|
||||
denoiser->denoise_pars.pickmode_mv_bias = 60;
|
||||
denoiser->denoise_pars.qp_thresh = 100;
|
||||
denoiser->denoise_pars.consec_zerolast = 10;
|
||||
denoiser->denoise_pars.spatial_blur = 20;
|
||||
denoiser->denoise_pars.pickmode_mv_bias = 75;
|
||||
denoiser->denoise_pars.qp_thresh = 80;
|
||||
denoiser->denoise_pars.consec_zerolast = 15;
|
||||
denoiser->denoise_pars.spatial_blur = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -415,8 +415,8 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg[i].frame_size);
|
||||
memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg[i].frame_size);
|
||||
|
||||
}
|
||||
denoiser->yv12_mc_running_avg.flags = 0;
|
||||
@@ -428,19 +428,19 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
||||
return 1;
|
||||
}
|
||||
|
||||
vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width,
|
||||
height, VP8BORDERINPIXELS) < 0) {
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_last_source.buffer_alloc, 0,
|
||||
denoiser->yv12_last_source.frame_size);
|
||||
memset(denoiser->yv12_last_source.buffer_alloc, 0,
|
||||
denoiser->yv12_last_source.frame_size);
|
||||
|
||||
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
|
||||
vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
||||
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
||||
vp8_denoiser_set_parameters(denoiser, mode);
|
||||
denoiser->nmse_source_diff = 0;
|
||||
denoiser->nmse_source_diff_count = 0;
|
||||
@@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
||||
// Bitrate thresholds and noise metric (nmse) thresholds for switching to
|
||||
// aggressive mode.
|
||||
// TODO(marpan): Adjust thresholds, including effect on resolution.
|
||||
denoiser->bitrate_threshold = 200000; // (bits/sec).
|
||||
denoiser->threshold_aggressive_mode = 35;
|
||||
if (width * height > 640 * 480) {
|
||||
denoiser->bitrate_threshold = 500000;
|
||||
denoiser->threshold_aggressive_mode = 100;
|
||||
denoiser->bitrate_threshold = 400000; // (bits/sec).
|
||||
denoiser->threshold_aggressive_mode = 80;
|
||||
if (width * height > 1280 * 720) {
|
||||
denoiser->bitrate_threshold = 3000000;
|
||||
denoiser->threshold_aggressive_mode = 200;
|
||||
} else if (width * height > 960 * 540) {
|
||||
denoiser->bitrate_threshold = 800000;
|
||||
denoiser->threshold_aggressive_mode = 150;
|
||||
} else if (width * height > 1280 * 720) {
|
||||
denoiser->bitrate_threshold = 2000000;
|
||||
denoiser->threshold_aggressive_mode = 1400;
|
||||
denoiser->bitrate_threshold = 1200000;
|
||||
denoiser->threshold_aggressive_mode = 120;
|
||||
} else if (width * height > 640 * 480) {
|
||||
denoiser->bitrate_threshold = 600000;
|
||||
denoiser->threshold_aggressive_mode = 100;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -483,7 +483,6 @@ void vp8_denoiser_free(VP8_DENOISER *denoiser)
|
||||
vpx_free(denoiser->denoise_state);
|
||||
}
|
||||
|
||||
|
||||
void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
MACROBLOCK *x,
|
||||
unsigned int best_sse,
|
||||
@@ -554,6 +553,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
* Note that any changes to the mode info only affects the
|
||||
* denoising.
|
||||
*/
|
||||
x->denoise_zeromv = 1;
|
||||
mbmi->ref_frame =
|
||||
x->best_zeromv_reference_frame;
|
||||
|
||||
@@ -603,6 +603,12 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
motion_threshold = denoiser->denoise_pars.scale_motion_thresh *
|
||||
NOISE_MOTION_THRESHOLD;
|
||||
|
||||
// If block is considered to be skin area, lower the motion threshold.
|
||||
// In current version set threshold = 1, so only denoise very low
|
||||
// (i.e., zero) mv on skin.
|
||||
if (x->is_skin)
|
||||
motion_threshold = 1;
|
||||
|
||||
if (motion_magnitude2 <
|
||||
denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD)
|
||||
x->increase_denoising = 1;
|
||||
@@ -662,6 +668,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
/* No filtering of this block; it differs too much from the predictor,
|
||||
* or the motion vector magnitude is considered too big.
|
||||
*/
|
||||
x->denoise_zeromv = 0;
|
||||
vp8_copy_mem16x16(
|
||||
x->thismb, 16,
|
||||
denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
|
||||
@@ -692,7 +699,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
|
||||
|
||||
// Fix filter level to some nominal value for now.
|
||||
int filter_level = 32;
|
||||
int filter_level = 48;
|
||||
|
||||
int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
|
||||
@@ -19,7 +19,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
|
||||
#define SUM_DIFF_THRESHOLD_HIGH (600)
|
||||
#define SUM_DIFF_THRESHOLD_HIGH (600) // ~(16 * 16 * 1.5)
|
||||
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
|
||||
|
||||
#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)
|
||||
@@ -27,6 +27,8 @@ extern "C" {
|
||||
#define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 8)
|
||||
#define MOTION_MAGNITUDE_THRESHOLD_UV (8*3)
|
||||
|
||||
#define MAX_GF_ARF_DENOISE_RANGE (8)
|
||||
|
||||
enum vp8_denoiser_decision
|
||||
{
|
||||
COPY_BLOCK,
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "encodemb.h"
|
||||
#include "encodemv.h"
|
||||
#include "vp8/common/common.h"
|
||||
@@ -82,6 +83,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
|
||||
{
|
||||
unsigned int act;
|
||||
unsigned int sse;
|
||||
(void)cpi;
|
||||
/* TODO: This could also be done over smaller areas (8x8), but that would
|
||||
* require extensive changes elsewhere, as lambda is assumed to be fixed
|
||||
* over an entire MB in most of the code.
|
||||
@@ -89,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
|
||||
* lambda using a non-linear combination (e.g., the smallest, or second
|
||||
* smallest, etc.).
|
||||
*/
|
||||
act = vp8_variance16x16(x->src.y_buffer,
|
||||
act = vpx_variance16x16(x->src.y_buffer,
|
||||
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
|
||||
act = act<<4;
|
||||
|
||||
@@ -154,8 +156,8 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
|
||||
cpi->common.MBs));
|
||||
|
||||
/* Copy map to sort list */
|
||||
vpx_memcpy( sortlist, cpi->mb_activity_map,
|
||||
sizeof(unsigned int) * cpi->common.MBs );
|
||||
memcpy( sortlist, cpi->mb_activity_map,
|
||||
sizeof(unsigned int) * cpi->common.MBs );
|
||||
|
||||
|
||||
/* Ripple each value down to its correct position */
|
||||
@@ -522,7 +524,8 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
}
|
||||
|
||||
#endif
|
||||
// Keep track of how many (consecutive) times a block is coded
|
||||
|
||||
// Keep track of how many (consecutive) times a block is coded
|
||||
// as ZEROMV_LASTREF, for base layer frames.
|
||||
// Reset to 0 if its coded as anything else.
|
||||
if (cpi->current_layer == 0) {
|
||||
@@ -531,9 +534,14 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// Increment, check for wrap-around.
|
||||
if (cpi->consec_zero_last[map_index+mb_col] < 255)
|
||||
cpi->consec_zero_last[map_index+mb_col] += 1;
|
||||
if (cpi->consec_zero_last_mvbias[map_index+mb_col] < 255)
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] += 1;
|
||||
} else {
|
||||
cpi->consec_zero_last[map_index+mb_col] = 0;
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
|
||||
}
|
||||
if (x->zero_last_dot_suppress)
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
|
||||
}
|
||||
|
||||
/* Special case code for cyclic refresh
|
||||
@@ -574,7 +582,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
/* pack tokens for this MB */
|
||||
{
|
||||
int tok_count = *tp - tp_start;
|
||||
pack_tokens(w, tp_start, tok_count);
|
||||
vp8_pack_tokens(w, tp_start, tok_count);
|
||||
}
|
||||
#endif
|
||||
/* Increment pointer into gf usage flags structure. */
|
||||
@@ -658,8 +666,7 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
|
||||
|
||||
x->mvc = cm->fc.mvc;
|
||||
|
||||
vpx_memset(cm->above_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
|
||||
memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
|
||||
|
||||
/* Special case treatment when GF and ARF are not sensible options
|
||||
* for reference
|
||||
@@ -737,7 +744,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
const int num_part = (1 << cm->multi_token_partition);
|
||||
#endif
|
||||
|
||||
vpx_memset(segment_counts, 0, sizeof(segment_counts));
|
||||
memset(segment_counts, 0, sizeof(segment_counts));
|
||||
totalrate = 0;
|
||||
|
||||
if (cpi->compressor_speed == 2)
|
||||
@@ -967,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
int i;
|
||||
|
||||
/* Set to defaults */
|
||||
vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
|
||||
memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
|
||||
|
||||
tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
|
||||
|
||||
@@ -1143,6 +1150,8 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
while (++b < 16);
|
||||
}
|
||||
|
||||
#else
|
||||
(void)cpi;
|
||||
#endif
|
||||
|
||||
++x->ymode_count[m];
|
||||
@@ -1252,7 +1261,6 @@ int vp8cx_encode_inter_macroblock
|
||||
if(cpi->sf.use_fastquant_for_pick)
|
||||
{
|
||||
x->quantize_b = vp8_fast_quantize_b;
|
||||
x->quantize_b_pair = vp8_fast_quantize_b_pair;
|
||||
|
||||
/* the fast quantizer does not use zbin_extra, so
|
||||
* do not recalculate */
|
||||
@@ -1265,7 +1273,6 @@ int vp8cx_encode_inter_macroblock
|
||||
if (cpi->sf.improved_quant)
|
||||
{
|
||||
x->quantize_b = vp8_regular_quantize_b;
|
||||
x->quantize_b_pair = vp8_regular_quantize_b_pair;
|
||||
}
|
||||
|
||||
/* restore cpi->zbin_mode_boost_enabled */
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "quantize.h"
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
#include "encodemb.h"
|
||||
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
|
||||
}
|
||||
}
|
||||
|
||||
intra_pred_var = vp8_get_mb_ss(x->src_diff);
|
||||
intra_pred_var = vpx_get_mb_ss(x->src_diff);
|
||||
|
||||
return intra_pred_var;
|
||||
}
|
||||
|
||||
@@ -506,8 +506,8 @@ static void optimize_mb(MACROBLOCK *x)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -555,8 +555,8 @@ void vp8_optimize_mby(MACROBLOCK *x)
|
||||
if (!x->e_mbd.left_context)
|
||||
return;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -595,8 +595,8 @@ void vp8_optimize_mbuv(MACROBLOCK *x)
|
||||
if (!x->e_mbd.left_context)
|
||||
return;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
|
||||
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
|
||||
|
||||
extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
|
||||
|
||||
static THREAD_FUNCTION thread_loopfilter(void *p_data)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
|
||||
@@ -215,11 +213,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
LAST_FRAME) {
|
||||
// Increment, check for wrap-around.
|
||||
if (cpi->consec_zero_last[map_index+mb_col] < 255)
|
||||
cpi->consec_zero_last[map_index+mb_col] +=
|
||||
1;
|
||||
cpi->consec_zero_last[map_index+mb_col] += 1;
|
||||
if (cpi->consec_zero_last_mvbias[map_index+mb_col] < 255)
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] += 1;
|
||||
} else {
|
||||
cpi->consec_zero_last[map_index+mb_col] = 0;
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
|
||||
}
|
||||
if (x->zero_last_dot_suppress)
|
||||
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
|
||||
}
|
||||
|
||||
/* Special case code for cyclic refresh
|
||||
@@ -261,7 +263,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
/* pack tokens for this MB */
|
||||
{
|
||||
int tok_count = tp - tp_start;
|
||||
pack_tokens(w, tp_start, tok_count);
|
||||
vp8_pack_tokens(w, tp_start, tok_count);
|
||||
}
|
||||
#else
|
||||
cpi->tplist[mb_row].stop = tp;
|
||||
@@ -346,7 +348,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
z->short_fdct8x4 = x->short_fdct8x4;
|
||||
z->short_walsh4x4 = x->short_walsh4x4;
|
||||
z->quantize_b = x->quantize_b;
|
||||
z->quantize_b_pair = x->quantize_b_pair;
|
||||
z->optimize = x->optimize;
|
||||
|
||||
/*
|
||||
@@ -413,14 +414,13 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
|
||||
zd->segmentation_enabled = xd->segmentation_enabled;
|
||||
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
|
||||
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
|
||||
sizeof(xd->segment_feature_data));
|
||||
memcpy(zd->segment_feature_data, xd->segment_feature_data,
|
||||
sizeof(xd->segment_feature_data));
|
||||
|
||||
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
|
||||
sizeof(xd->dequant_y1_dc));
|
||||
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
|
||||
#if 1
|
||||
/*TODO: Remove dequant from BLOCKD. This is a temporary solution until
|
||||
@@ -435,15 +435,14 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
#endif
|
||||
|
||||
|
||||
vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
|
||||
vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,
|
||||
sizeof(x->rd_thresh_mult));
|
||||
memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
|
||||
memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult));
|
||||
|
||||
z->zbin_over_quant = x->zbin_over_quant;
|
||||
z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
|
||||
z->zbin_mode_boost = x->zbin_mode_boost;
|
||||
|
||||
vpx_memset(z->error_bins, 0, sizeof(z->error_bins));
|
||||
memset(z->error_bins, 0, sizeof(z->error_bins));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -469,7 +468,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
|
||||
mb->gf_active_ptr = x->gf_active_ptr;
|
||||
|
||||
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
|
||||
memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
|
||||
mbr_ei[i].totalrate = 0;
|
||||
|
||||
mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
|
||||
@@ -506,6 +505,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
mb->intra_error = 0;
|
||||
vp8_zero(mb->count_mb_ref_frame_usage);
|
||||
mb->mbs_tested_so_far = 0;
|
||||
mb->mbs_zero_last_dot_suppress = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -543,7 +543,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
vpx_malloc(sizeof(sem_t) * th_count));
|
||||
CHECK_MEM_ERROR(cpi->mb_row_ei,
|
||||
vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
|
||||
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
|
||||
memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data,
|
||||
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "block.h"
|
||||
#include "onyx_int.h"
|
||||
@@ -34,8 +35,6 @@
|
||||
/* #define OUTPUT_FPF 1 */
|
||||
|
||||
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
|
||||
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
|
||||
extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
|
||||
|
||||
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
|
||||
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
|
||||
@@ -132,6 +131,7 @@ static void output_stats(const VP8_COMP *cpi,
|
||||
FIRSTPASS_STATS *stats)
|
||||
{
|
||||
struct vpx_codec_cx_pkt pkt;
|
||||
(void)cpi;
|
||||
pkt.kind = VPX_CODEC_STATS_PKT;
|
||||
pkt.data.twopass_stats.buf = stats;
|
||||
pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
|
||||
@@ -418,18 +418,19 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
|
||||
int raw_stride = raw_buffer->y_stride;
|
||||
unsigned char *ref_ptr;
|
||||
int ref_stride = x->e_mbd.pre.y_stride;
|
||||
(void)cpi;
|
||||
|
||||
/* Set up pointers for this macro block raw buffer */
|
||||
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
|
||||
+ d->offset);
|
||||
vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
|
||||
(unsigned int *)(raw_motion_err));
|
||||
vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
|
||||
(unsigned int *)(raw_motion_err));
|
||||
|
||||
/* Set up pointers for this macro block recon buffer */
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
|
||||
vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
|
||||
(unsigned int *)(best_motion_err));
|
||||
vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
|
||||
(unsigned int *)(best_motion_err));
|
||||
}
|
||||
|
||||
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
@@ -453,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
int new_mv_mode_penalty = 256;
|
||||
|
||||
/* override the default variance function to use MSE */
|
||||
v_fn_ptr.vf = vp8_mse16x16;
|
||||
v_fn_ptr.vf = vpx_mse16x16;
|
||||
|
||||
/* Set up pointers for this macro block recon buffer */
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
@@ -571,7 +572,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
{
|
||||
int flag[2] = {1, 1};
|
||||
vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
|
||||
vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
|
||||
}
|
||||
|
||||
@@ -1327,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
return Q;
|
||||
}
|
||||
|
||||
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
|
||||
|
||||
void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
FIRSTPASS_STATS this_frame;
|
||||
@@ -1409,6 +1408,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
|
||||
void vp8_end_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
(void)cpi;
|
||||
}
|
||||
|
||||
/* This function gives and estimate of how badly we believe the prediction
|
||||
@@ -1419,6 +1419,7 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra
|
||||
double prediction_decay_rate;
|
||||
double motion_decay;
|
||||
double motion_pct = next_frame->pcnt_motion;
|
||||
(void)cpi;
|
||||
|
||||
/* Initial basis is the % mbs inter coded */
|
||||
prediction_decay_rate = next_frame->pcnt_inter;
|
||||
@@ -1547,6 +1548,7 @@ static void accumulate_frame_motion_stats(
|
||||
double this_frame_mvr_ratio;
|
||||
double this_frame_mvc_ratio;
|
||||
double motion_pct;
|
||||
(void)cpi;
|
||||
|
||||
/* Accumulate motion stats. */
|
||||
motion_pct = this_frame->pcnt_motion;
|
||||
@@ -1774,7 +1776,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
start_pos = cpi->twopass.stats_in;
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
|
||||
memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
|
||||
|
||||
/* Load stats for the current frame. */
|
||||
mod_frame_err = calculate_modified_err(cpi, this_frame);
|
||||
@@ -1870,7 +1872,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
break;
|
||||
}
|
||||
|
||||
vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame));
|
||||
memcpy(this_frame, &next_frame, sizeof(*this_frame));
|
||||
|
||||
old_boost_score = boost_score;
|
||||
}
|
||||
@@ -2440,7 +2442,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->twopass.frames_to_key == 0)
|
||||
{
|
||||
/* Define next KF group and assign bits to it */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
find_next_key_frame(cpi, &this_frame_copy);
|
||||
|
||||
/* Special case: Error error_resilient_mode mode does not make much
|
||||
@@ -2466,7 +2468,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->frames_till_gf_update_due == 0)
|
||||
{
|
||||
/* Define next gf group and assign bits to it */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
define_gf_group(cpi, &this_frame_copy);
|
||||
|
||||
/* If we are going to code an altref frame at the end of the group
|
||||
@@ -2482,7 +2484,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
* to the GF group
|
||||
*/
|
||||
int bak = cpi->per_frame_bandwidth;
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
cpi->per_frame_bandwidth = bak;
|
||||
}
|
||||
@@ -2505,14 +2507,14 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->common.frame_type != KEY_FRAME)
|
||||
{
|
||||
/* Assign bits from those allocated to the GF group */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Assign bits from those allocated to the GF group */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
}
|
||||
}
|
||||
@@ -2653,7 +2655,7 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
|
||||
double decay_accumulator = 1.0;
|
||||
double next_iiratio;
|
||||
|
||||
vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
|
||||
memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
|
||||
|
||||
/* Note the starting file position so we can reset to it */
|
||||
start_pos = cpi->twopass.stats_in;
|
||||
@@ -2730,7 +2732,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double kf_group_coded_err = 0.0;
|
||||
double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame));
|
||||
memset(&next_frame, 0, sizeof(next_frame));
|
||||
|
||||
vp8_clear_system_state();
|
||||
start_position = cpi->twopass.stats_in;
|
||||
@@ -2751,7 +2753,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->twopass.frames_to_key = 1;
|
||||
|
||||
/* Take a copy of the initial frame details */
|
||||
vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
|
||||
memcpy(&first_frame, this_frame, sizeof(*this_frame));
|
||||
|
||||
cpi->twopass.kf_group_bits = 0;
|
||||
cpi->twopass.kf_group_error_left = 0;
|
||||
@@ -2774,7 +2776,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_group_coded_err += this_frame->coded_error;
|
||||
|
||||
/* Load the next frame's stats. */
|
||||
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
|
||||
memcpy(&last_frame, this_frame, sizeof(*this_frame));
|
||||
input_stats(cpi, this_frame);
|
||||
|
||||
/* Provided that we are not at the end of the file... */
|
||||
@@ -2842,7 +2844,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->twopass.frames_to_key /= 2;
|
||||
|
||||
/* Copy first frame details */
|
||||
vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
|
||||
memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
|
||||
|
||||
/* Reset to the start of the group */
|
||||
reset_fpf_position(cpi, start_position);
|
||||
@@ -2964,7 +2966,6 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
*/
|
||||
decay_accumulator = 1.0;
|
||||
boost_score = 0.0;
|
||||
loop_decay_rate = 1.00; /* Starting decay rate */
|
||||
|
||||
for (i = 0 ; i < cpi->twopass.frames_to_key ; i++)
|
||||
{
|
||||
@@ -3208,7 +3209,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
int new_width = cpi->oxcf.Width;
|
||||
int new_height = cpi->oxcf.Height;
|
||||
|
||||
int projected_buffer_level = (int)cpi->buffer_level;
|
||||
int projected_buffer_level;
|
||||
int tmp_q;
|
||||
|
||||
double projected_bits_perframe;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "onyx_int.h"
|
||||
#include "mcomp.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
@@ -888,6 +890,8 @@ int vp8_hex_search
|
||||
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
|
||||
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
|
||||
|
||||
(void)mvcost;
|
||||
|
||||
/* adjust ref_mv to make sure it is within MV range */
|
||||
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
|
||||
br = ref_mv->as_mv.row;
|
||||
@@ -898,7 +902,7 @@ int vp8_hex_search
|
||||
this_offset = base_offset + (br * (pre_stride)) + bc;
|
||||
this_mv.as_mv.row = br;
|
||||
this_mv.as_mv.col = bc;
|
||||
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
|
||||
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
|
||||
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
@@ -911,6 +915,8 @@ int vp8_hex_search
|
||||
else if (search_param >= 1) hex_range = 63;
|
||||
|
||||
dia_range = 8;
|
||||
#else
|
||||
(void)search_param;
|
||||
#endif
|
||||
|
||||
/* hex search */
|
||||
@@ -923,7 +929,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.row = br + hex[i].row;
|
||||
this_mv.as_mv.col = bc + hex[i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -934,7 +940,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.col = bc + hex[i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -960,7 +966,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.row = br + next_chkpts[k][i].row;
|
||||
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -971,7 +977,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -1002,7 +1008,7 @@ cal_neighbors:
|
||||
this_mv.as_mv.row = br + neighbors[i].row;
|
||||
this_mv.as_mv.col = bc + neighbors[i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -1013,7 +1019,7 @@ cal_neighbors:
|
||||
this_mv.as_mv.col = bc + neighbors[i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -1097,7 +1103,7 @@ int vp8_diamond_search_sad_c
|
||||
best_address = in_what;
|
||||
|
||||
/* Check the starting position */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* search_param determines the length of the initial step and hence
|
||||
@@ -1122,7 +1128,7 @@ int vp8_diamond_search_sad_c
|
||||
|
||||
{
|
||||
check_here = ss[i].offset + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1221,7 +1227,7 @@ int vp8_diamond_search_sadx4
|
||||
best_address = in_what;
|
||||
|
||||
/* Check the starting position */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* search_param determines the length of the initial step and hence the
|
||||
@@ -1289,7 +1295,7 @@ int vp8_diamond_search_sadx4
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = ss[i].offset + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1372,8 +1378,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
|
||||
in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that
|
||||
@@ -1398,7 +1403,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
for (c = col_min; c < col_max; c++)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
|
||||
this_mv.as_mv.col = c;
|
||||
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
|
||||
@@ -1470,8 +1475,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
|
||||
in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that stretch
|
||||
@@ -1527,7 +1531,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1586,7 +1590,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
|
||||
// TODO(johannkoenig): check if this alignment is necessary.
|
||||
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
|
||||
unsigned int sad_array[3];
|
||||
|
||||
int *mvsadcost[2];
|
||||
@@ -1605,8 +1610,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride,
|
||||
bestaddress, in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that stretch
|
||||
@@ -1692,7 +1696,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1750,8 +1754,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
|
||||
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
|
||||
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
|
||||
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address,
|
||||
in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
|
||||
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
|
||||
|
||||
for (i=0; i<search_range; i++)
|
||||
@@ -1767,7 +1770,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1830,8 +1833,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
|
||||
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
|
||||
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address,
|
||||
in_what_stride, UINT_MAX)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
|
||||
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
|
||||
|
||||
for (i=0; i<search_range; i++)
|
||||
@@ -1882,7 +1884,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1974,8 +1976,8 @@ void print_mode_context(void)
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
void init_mv_ref_counts()
|
||||
{
|
||||
vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
|
||||
vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
|
||||
memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
|
||||
memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
|
||||
}
|
||||
|
||||
void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "modecosts.h"
|
||||
#include "onyx_int.h"
|
||||
#include "treewriter.h"
|
||||
#include "vp8/common/entropymode.h"
|
||||
|
||||
@@ -16,7 +16,9 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_init_mode_costs(VP8_COMP *x);
|
||||
struct VP8_COMP;
|
||||
|
||||
void vp8_init_mode_costs(struct VP8_COMP *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+380
-131
@@ -11,6 +11,8 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "onyx_int.h"
|
||||
@@ -427,10 +429,10 @@ static void setup_features(VP8_COMP *cpi)
|
||||
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0;
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
|
||||
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
|
||||
set_default_lf_deltas(cpi);
|
||||
|
||||
@@ -507,7 +509,7 @@ static void disable_segmentation(VP8_COMP *cpi)
|
||||
static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
|
||||
{
|
||||
/* Copy in the new segmentation map */
|
||||
vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
|
||||
/* Signal that the map should be updated. */
|
||||
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
|
||||
@@ -529,7 +531,7 @@ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
|
||||
static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
|
||||
{
|
||||
cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
|
||||
vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
|
||||
memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
|
||||
}
|
||||
|
||||
|
||||
@@ -579,11 +581,32 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
|
||||
|
||||
cpi->cyclic_refresh_q = Q / 2;
|
||||
|
||||
if (cpi->oxcf.screen_content_mode) {
|
||||
// Modify quality ramp-up based on Q. Above some Q level, increase the
|
||||
// number of blocks to be refreshed, and reduce it below the thredhold.
|
||||
// Turn-off under certain conditions (i.e., away from key frame, and if
|
||||
// we are at good quality (low Q) and most of the blocks were skipped-encoded
|
||||
// in previous frame.
|
||||
int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100;
|
||||
if (Q >= qp_thresh) {
|
||||
cpi->cyclic_refresh_mode_max_mbs_perframe =
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols) / 10;
|
||||
} else if (cpi->frames_since_key > 250 &&
|
||||
Q < 20 &&
|
||||
cpi->mb.skip_true_count > (int)(0.95 * mbs_in_frame)) {
|
||||
cpi->cyclic_refresh_mode_max_mbs_perframe = 0;
|
||||
} else {
|
||||
cpi->cyclic_refresh_mode_max_mbs_perframe =
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols) / 20;
|
||||
}
|
||||
block_count = cpi->cyclic_refresh_mode_max_mbs_perframe;
|
||||
}
|
||||
|
||||
// Set every macroblock to be eligible for update.
|
||||
// For key frame this will reset seg map to 0.
|
||||
vpx_memset(cpi->segmentation_map, 0, mbs_in_frame);
|
||||
memset(cpi->segmentation_map, 0, mbs_in_frame);
|
||||
|
||||
if (cpi->common.frame_type != KEY_FRAME)
|
||||
if (cpi->common.frame_type != KEY_FRAME && block_count > 0)
|
||||
{
|
||||
/* Cycle through the macro_block rows */
|
||||
/* MB loop to set local segmentation map */
|
||||
@@ -617,15 +640,18 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0) {
|
||||
if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive &&
|
||||
Q < (int)cpi->denoiser.denoise_pars.qp_thresh) {
|
||||
Q < (int)cpi->denoiser.denoise_pars.qp_thresh &&
|
||||
(cpi->frames_since_key >
|
||||
2 * cpi->denoiser.denoise_pars.consec_zerolast)) {
|
||||
// Under aggressive denoising, use segmentation to turn off loop
|
||||
// filter below some qp thresh. The filter is turned off for all
|
||||
// filter below some qp thresh. The filter is reduced for all
|
||||
// blocks that have been encoded as ZEROMV LAST x frames in a row,
|
||||
// where x is set by cpi->denoiser.denoise_pars.consec_zerolast.
|
||||
// This is to avoid "dot" artifacts that can occur from repeated
|
||||
// loop filtering on noisy input source.
|
||||
cpi->cyclic_refresh_q = Q;
|
||||
lf_adjustment = -MAX_LOOP_FILTER;
|
||||
// lf_adjustment = -MAX_LOOP_FILTER;
|
||||
lf_adjustment = -40;
|
||||
for (i = 0; i < mbs_in_frame; ++i) {
|
||||
seg_map[i] = (cpi->consec_zero_last[i] >
|
||||
cpi->denoiser.denoise_pars.consec_zerolast) ? 1 : 0;
|
||||
@@ -662,8 +688,8 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
|
||||
|
||||
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
|
||||
/* Test of ref frame deltas */
|
||||
cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
|
||||
@@ -786,6 +812,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
cpi->mb.mbs_tested_so_far = 0;
|
||||
cpi->mb.mbs_zero_last_dot_suppress = 0;
|
||||
|
||||
/* best quality defaults */
|
||||
sf->RD = 1;
|
||||
@@ -853,6 +880,25 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_SPLIT2] =
|
||||
sf->thresh_mult[THR_SPLIT3] = speed_map(Speed, thresh_mult_map_split2);
|
||||
|
||||
// Special case for temporal layers.
|
||||
// Reduce the thresholds for zero/nearest/near for GOLDEN, if GOLDEN is
|
||||
// used as second reference. We don't modify thresholds for ALTREF case
|
||||
// since ALTREF is usually used as long-term reference in temporal layers.
|
||||
if ((cpi->Speed <= 6) &&
|
||||
(cpi->oxcf.number_of_layers > 1) &&
|
||||
(cpi->ref_frame_flags & VP8_LAST_FRAME) &&
|
||||
(cpi->ref_frame_flags & VP8_GOLD_FRAME)) {
|
||||
if (cpi->closest_reference_frame == GOLDEN_FRAME) {
|
||||
sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 3;
|
||||
sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 3;
|
||||
sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 3;
|
||||
} else {
|
||||
sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 1;
|
||||
sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 1;
|
||||
sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
cpi->mode_check_freq[THR_ZERO1] =
|
||||
cpi->mode_check_freq[THR_NEAREST1] =
|
||||
cpi->mode_check_freq[THR_NEAR1] =
|
||||
@@ -1043,7 +1089,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
if (Speed >= 15)
|
||||
sf->half_pixel_search = 0;
|
||||
|
||||
vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
|
||||
memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
|
||||
|
||||
}; /* switch */
|
||||
|
||||
@@ -1083,12 +1129,10 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
if (cpi->sf.improved_quant)
|
||||
{
|
||||
cpi->mb.quantize_b = vp8_regular_quantize_b;
|
||||
cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
|
||||
}
|
||||
else
|
||||
{
|
||||
cpi->mb.quantize_b = vp8_fast_quantize_b;
|
||||
cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;
|
||||
}
|
||||
if (cpi->sf.improved_quant != last_improved_quant)
|
||||
vp8cx_init_quantizer(cpi);
|
||||
@@ -1256,7 +1300,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
CHECK_MEM_ERROR(cpi->active_map,
|
||||
vpx_calloc(cm->mb_rows * cm->mb_cols,
|
||||
sizeof(*cpi->active_map)));
|
||||
vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (width < 640)
|
||||
@@ -1363,20 +1407,31 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
cm->version = oxcf->Version;
|
||||
vp8_setup_version(cm);
|
||||
|
||||
/* frame rate is not available on the first frame, as it's derived from
|
||||
/* Frame rate is not available on the first frame, as it's derived from
|
||||
* the observed timestamps. The actual value used here doesn't matter
|
||||
* too much, as it will adapt quickly. If the reciprocal of the timebase
|
||||
* seems like a reasonable framerate, then use that as a guess, otherwise
|
||||
* use 30.
|
||||
* too much, as it will adapt quickly.
|
||||
*/
|
||||
cpi->framerate = (double)(oxcf->timebase.den) /
|
||||
(double)(oxcf->timebase.num);
|
||||
if (oxcf->timebase.num > 0) {
|
||||
cpi->framerate = (double)(oxcf->timebase.den) /
|
||||
(double)(oxcf->timebase.num);
|
||||
} else {
|
||||
cpi->framerate = 30;
|
||||
}
|
||||
|
||||
/* If the reciprocal of the timebase seems like a reasonable framerate,
|
||||
* then use that as a guess, otherwise use 30.
|
||||
*/
|
||||
if (cpi->framerate > 180)
|
||||
cpi->framerate = 30;
|
||||
|
||||
cpi->ref_framerate = cpi->framerate;
|
||||
|
||||
cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
|
||||
|
||||
cm->refresh_golden_frame = 0;
|
||||
cm->refresh_last_frame = 1;
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
/* change includes all joint functionality */
|
||||
vp8_change_config(cpi, oxcf);
|
||||
|
||||
@@ -1597,12 +1652,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
cpi->baseline_gf_interval =
|
||||
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
|
||||
cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
|
||||
|
||||
cm->refresh_golden_frame = 0;
|
||||
cm->refresh_last_frame = 1;
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
|
||||
cpi->oxcf.token_partitions = 3;
|
||||
#endif
|
||||
@@ -1705,13 +1754,25 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
if (cpi->oxcf.number_of_layers != prev_number_of_layers)
|
||||
{
|
||||
// If the number of temporal layers are changed we must start at the
|
||||
// base of the pattern cycle, so reset temporal_pattern_counter.
|
||||
// base of the pattern cycle, so set the layer id to 0 and reset
|
||||
// the temporal pattern counter.
|
||||
if (cpi->temporal_layer_id > 0) {
|
||||
cpi->temporal_layer_id = 0;
|
||||
}
|
||||
cpi->temporal_pattern_counter = 0;
|
||||
reset_temporal_layer_change(cpi, oxcf, prev_number_of_layers);
|
||||
}
|
||||
|
||||
if (!cpi->initial_width)
|
||||
{
|
||||
cpi->initial_width = cpi->oxcf.Width;
|
||||
cpi->initial_height = cpi->oxcf.Height;
|
||||
}
|
||||
|
||||
cm->Width = cpi->oxcf.Width;
|
||||
cm->Height = cpi->oxcf.Height;
|
||||
assert(cm->Width <= cpi->initial_width);
|
||||
assert(cm->Height <= cpi->initial_height);
|
||||
|
||||
/* TODO(jkoleszar): if an internal spatial resampling is active,
|
||||
* and we downsize the input image, maybe we should clear the
|
||||
@@ -1832,7 +1893,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
|
||||
cm = &cpi->common;
|
||||
|
||||
vpx_memset(cpi, 0, sizeof(VP8_COMP));
|
||||
memset(cpi, 0, sizeof(VP8_COMP));
|
||||
|
||||
if (setjmp(cm->error.jmp))
|
||||
{
|
||||
@@ -1852,6 +1913,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
|
||||
cpi->common.current_video_frame = 0;
|
||||
cpi->temporal_pattern_counter = 0;
|
||||
cpi->temporal_layer_id = -1;
|
||||
cpi->kf_overspend_bits = 0;
|
||||
cpi->kf_bitrate_adjustment = 0;
|
||||
cpi->frames_till_gf_update_due = 0;
|
||||
@@ -1904,6 +1966,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
}
|
||||
#endif
|
||||
|
||||
cpi->mse_source_denoised = 0;
|
||||
|
||||
/* Should we use the cyclic refresh method.
|
||||
* Currently this is tied to error resilliant mode
|
||||
*/
|
||||
@@ -1927,7 +1991,9 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->cyclic_refresh_map = (signed char *) NULL;
|
||||
|
||||
CHECK_MEM_ERROR(cpi->consec_zero_last,
|
||||
vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
|
||||
vpx_calloc(cm->mb_rows * cm->mb_cols, 1));
|
||||
CHECK_MEM_ERROR(cpi->consec_zero_last_mvbias,
|
||||
vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
|
||||
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
init_context_counters();
|
||||
@@ -1946,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->source_alt_ref_active = 0;
|
||||
cpi->common.refresh_alt_ref_frame = 0;
|
||||
|
||||
cpi->force_maxqp = 0;
|
||||
|
||||
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
|
||||
#if CONFIG_INTERNAL_STATS
|
||||
cpi->b_calculate_ssimg = 0;
|
||||
@@ -2062,55 +2130,55 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
}
|
||||
#endif
|
||||
|
||||
cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16;
|
||||
cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
|
||||
cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16;
|
||||
cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16;
|
||||
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h;
|
||||
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v;
|
||||
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vp8_variance_halfpixvar16x16_hv;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx3f = vp8_sad16x16x3;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx8f = vp8_sad16x16x8;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx4df = vp8_sad16x16x4d;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8;
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
|
||||
|
||||
cpi->fn_ptr[BLOCK_16X8].sdf = vp8_sad16x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
|
||||
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx3f = vp8_sad16x8x3;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx8f = vp8_sad16x8x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx4df = vp8_sad16x8x4d;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
|
||||
|
||||
cpi->fn_ptr[BLOCK_8X16].sdf = vp8_sad8x16;
|
||||
cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
|
||||
cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16;
|
||||
cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16;
|
||||
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx3f = vp8_sad8x16x3;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx8f = vp8_sad8x16x8;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx4df = vp8_sad8x16x4d;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
|
||||
|
||||
cpi->fn_ptr[BLOCK_8X8].sdf = vp8_sad8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx3f = vp8_sad8x8x3;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx8f = vp8_sad8x8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx4df = vp8_sad8x8x4d;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
|
||||
|
||||
cpi->fn_ptr[BLOCK_4X4].sdf = vp8_sad4x4;
|
||||
cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
|
||||
cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4;
|
||||
cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4;
|
||||
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
|
||||
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx3f = vp8_sad4x4x3;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx8f = vp8_sad4x4x8;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx4df = vp8_sad4x4x4d;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
|
||||
@@ -2206,9 +2274,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
||||
|
||||
if (cpi->b_calculate_psnr)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *lst_yv12 =
|
||||
&cpi->common.yv12_fb[cpi->common.lst_fb_idx];
|
||||
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
{
|
||||
int i;
|
||||
@@ -2220,7 +2285,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
||||
double dr = (double)cpi->bytes_in_layer[i] *
|
||||
8.0 / 1000.0 / time_encoded;
|
||||
double samples = 3.0 / 2 * cpi->frames_in_layer[i] *
|
||||
lst_yv12->y_width * lst_yv12->y_height;
|
||||
cpi->common.Width * cpi->common.Height;
|
||||
double total_psnr =
|
||||
vpx_sse_to_psnr(samples, 255.0,
|
||||
cpi->total_error2[i]);
|
||||
@@ -2242,7 +2307,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
||||
else
|
||||
{
|
||||
double samples = 3.0 / 2 * cpi->count *
|
||||
lst_yv12->y_width * lst_yv12->y_height;
|
||||
cpi->common.Width * cpi->common.Height;
|
||||
double total_psnr = vpx_sse_to_psnr(samples, 255.0,
|
||||
cpi->total_sq_error);
|
||||
double total_psnr2 = vpx_sse_to_psnr(samples, 255.0,
|
||||
@@ -2450,6 +2515,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
||||
vpx_free(cpi->tok);
|
||||
vpx_free(cpi->cyclic_refresh_map);
|
||||
vpx_free(cpi->consec_zero_last);
|
||||
vpx_free(cpi->consec_zero_last_mvbias);
|
||||
|
||||
vp8_remove_common(&cpi->common);
|
||||
vpx_free(cpi);
|
||||
@@ -2492,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
|
||||
{
|
||||
unsigned int sse;
|
||||
|
||||
vp8_mse16x16(orig + col, orig_stride,
|
||||
vpx_mse16x16(orig + col, orig_stride,
|
||||
recon + col, recon_stride,
|
||||
&sse);
|
||||
total_sse += sse;
|
||||
@@ -2805,7 +2871,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
/* Update data structure that monitors level of reference to last GF */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
/* this frame refreshes means next frames don't unless specified by user */
|
||||
@@ -2854,7 +2920,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
/* Update data structure that monitors level of reference to last GF */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
/* this frame refreshes means next frames don't unless specified by
|
||||
@@ -3293,6 +3359,49 @@ static void update_reference_frames(VP8_COMP *cpi)
|
||||
|
||||
}
|
||||
|
||||
static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
VP8_COMP *cpi)
|
||||
{
|
||||
int i, j;
|
||||
int Total = 0;
|
||||
int num_blocks = 0;
|
||||
int skip = 2;
|
||||
int min_consec_zero_last = 10;
|
||||
int tot_num_blocks = (source->y_height * source->y_width) >> 8;
|
||||
unsigned char *src = source->y_buffer;
|
||||
unsigned char *dst = dest->y_buffer;
|
||||
|
||||
/* Loop through the Y plane, every |skip| blocks along rows and colmumns,
|
||||
* summing the square differences, and only for blocks that have been
|
||||
* zero_last mode at least |x| frames in a row.
|
||||
*/
|
||||
for (i = 0; i < source->y_height; i += 16 * skip)
|
||||
{
|
||||
int block_index_row = (i >> 4) * cpi->common.mb_cols;
|
||||
for (j = 0; j < source->y_width; j += 16 * skip)
|
||||
{
|
||||
int index = block_index_row + (j >> 4);
|
||||
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
|
||||
unsigned int sse;
|
||||
Total += vpx_mse16x16(src + j,
|
||||
source->y_stride,
|
||||
dst + j, dest->y_stride,
|
||||
&sse);
|
||||
num_blocks++;
|
||||
}
|
||||
}
|
||||
src += 16 * skip * source->y_stride;
|
||||
dst += 16 * skip * dest->y_stride;
|
||||
}
|
||||
// Only return non-zero if we have at least ~1/16 samples for estimate.
|
||||
if (num_blocks > (tot_num_blocks >> 4)) {
|
||||
return (Total / num_blocks);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
const VP8_COMMON *const cm = &cpi->common;
|
||||
@@ -3305,12 +3414,12 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
// Only select blocks for computing nmse that have been encoded
|
||||
// as ZERO LAST min_consec_zero_last frames in a row.
|
||||
// Scale with number of temporal layers.
|
||||
int min_consec_zero_last = 8 / cpi->oxcf.number_of_layers;
|
||||
int min_consec_zero_last = 12 / cpi->oxcf.number_of_layers;
|
||||
// Decision is tested for changing the denoising mode every
|
||||
// num_mode_change times this function is called. Note that this
|
||||
// function called every 8 frames, so (8 * num_mode_change) is number
|
||||
// of frames where denoising mode change is tested for switch.
|
||||
int num_mode_change = 15;
|
||||
int num_mode_change = 20;
|
||||
// Framerate factor, to compensate for larger mse at lower framerates.
|
||||
// Use ref_framerate, which is full source framerate for temporal layers.
|
||||
// TODO(marpan): Adjust this factor.
|
||||
@@ -3322,7 +3431,12 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
static const unsigned char const_source[16] = {
|
||||
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128};
|
||||
|
||||
int bandwidth = (int)(cpi->target_bandwidth);
|
||||
// For temporal layers, use full bandwidth (top layer).
|
||||
if (cpi->oxcf.number_of_layers > 1) {
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[cpi->oxcf.number_of_layers - 1];
|
||||
bandwidth = (int)(lc->target_bandwidth);
|
||||
}
|
||||
// Loop through the Y plane, every skip blocks along rows and columns,
|
||||
// summing the normalized mean square error, only for blocks that have
|
||||
// been encoded as ZEROMV LAST at least min_consec_zero_last least frames in
|
||||
@@ -3334,12 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
int index = block_index_row + (j >> 4);
|
||||
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
|
||||
unsigned int sse;
|
||||
const unsigned int mse = vp8_mse16x16(src + j,
|
||||
ystride,
|
||||
dst + j,
|
||||
ystride,
|
||||
&sse);
|
||||
const unsigned int var = vp8_variance16x16(src + j,
|
||||
const unsigned int var = vpx_variance16x16(src + j,
|
||||
ystride,
|
||||
dst + j,
|
||||
ystride,
|
||||
@@ -3347,14 +3456,15 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
// Only consider this block as valid for noise measurement
|
||||
// if the sum_diff average of the current and previous frame
|
||||
// is small (to avoid effects from lighting change).
|
||||
if ((mse - var) < 256) {
|
||||
const unsigned int act = vp8_variance16x16(src + j,
|
||||
if ((sse - var) < 128) {
|
||||
unsigned int sse2;
|
||||
const unsigned int act = vpx_variance16x16(src + j,
|
||||
ystride,
|
||||
const_source,
|
||||
0,
|
||||
&sse);
|
||||
&sse2);
|
||||
if (act > 0)
|
||||
total += mse / act;
|
||||
total += sse / act;
|
||||
num_blocks++;
|
||||
}
|
||||
}
|
||||
@@ -3370,16 +3480,17 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
if (total > 0 &&
|
||||
(num_blocks > (tot_num_blocks >> 4))) {
|
||||
// Update the recursive mean square source_diff.
|
||||
total = (total << 8) / num_blocks;
|
||||
if (cpi->denoiser.nmse_source_diff_count == 0) {
|
||||
// First sample in new interval.
|
||||
cpi->denoiser.nmse_source_diff = total;
|
||||
cpi->denoiser.qp_avg = cm->base_qindex;
|
||||
} else {
|
||||
// For subsequent samples, use average with weight ~1/4 for new sample.
|
||||
cpi->denoiser.nmse_source_diff = (int)((total >> 2) +
|
||||
3 * (cpi->denoiser.nmse_source_diff >> 2));
|
||||
cpi->denoiser.qp_avg = (int)((cm->base_qindex >> 2) +
|
||||
3 * (cpi->denoiser.qp_avg >> 2));
|
||||
cpi->denoiser.nmse_source_diff = (int)((total +
|
||||
3 * cpi->denoiser.nmse_source_diff) >> 2);
|
||||
cpi->denoiser.qp_avg = (int)((cm->base_qindex +
|
||||
3 * cpi->denoiser.qp_avg) >> 2);
|
||||
}
|
||||
cpi->denoiser.nmse_source_diff_count++;
|
||||
}
|
||||
@@ -3391,7 +3502,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
(cpi->denoiser.nmse_source_diff >
|
||||
cpi->denoiser.threshold_aggressive_mode) &&
|
||||
(cpi->denoiser.qp_avg < cpi->denoiser.qp_threshold_up &&
|
||||
cpi->target_bandwidth > cpi->denoiser.bitrate_threshold)) {
|
||||
bandwidth > cpi->denoiser.bitrate_threshold)) {
|
||||
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive);
|
||||
} else {
|
||||
// Check for going down: from aggressive to normal mode.
|
||||
@@ -3400,7 +3511,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
|
||||
cpi->denoiser.threshold_aggressive_mode)) ||
|
||||
((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
|
||||
(cpi->denoiser.qp_avg > cpi->denoiser.qp_threshold_down ||
|
||||
cpi->target_bandwidth < cpi->denoiser.bitrate_threshold))) {
|
||||
bandwidth < cpi->denoiser.bitrate_threshold))) {
|
||||
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV);
|
||||
}
|
||||
}
|
||||
@@ -3416,6 +3527,13 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
{
|
||||
const FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
int update_any_ref_buffers = 1;
|
||||
if (cpi->common.refresh_last_frame == 0 &&
|
||||
cpi->common.refresh_golden_frame == 0 &&
|
||||
cpi->common.refresh_alt_ref_frame == 0) {
|
||||
update_any_ref_buffers = 0;
|
||||
}
|
||||
|
||||
if (cm->no_lpf)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
@@ -3427,11 +3545,36 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
vp8_clear_system_state();
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
if (cpi->sf.auto_filter == 0)
|
||||
if (cpi->sf.auto_filter == 0) {
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) {
|
||||
// Use the denoised buffer for selecting base loop filter level.
|
||||
// Denoised signal for current frame is stored in INTRA_FRAME.
|
||||
// No denoising on key frames.
|
||||
vp8cx_pick_filter_level_fast(
|
||||
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi);
|
||||
} else {
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
}
|
||||
#else
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
|
||||
else
|
||||
#endif
|
||||
} else {
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) {
|
||||
// Use the denoised buffer for selecting base loop filter level.
|
||||
// Denoised signal for current frame is stored in INTRA_FRAME.
|
||||
// No denoising on key frames.
|
||||
vp8cx_pick_filter_level(
|
||||
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi);
|
||||
} else {
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
}
|
||||
#else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
@@ -3447,7 +3590,9 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
|
||||
#endif
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
// No need to apply loop-filter if the encoded frame does not update
|
||||
// any reference buffers.
|
||||
if (cm->filter_level > 0 && update_any_ref_buffers)
|
||||
{
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type);
|
||||
}
|
||||
@@ -3577,39 +3722,78 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* In multi-resolution encoding, frame_type is decided by lowest-resolution
|
||||
* encoder. Same frame_type is adopted while encoding at other resolution.
|
||||
*/
|
||||
if (cpi->oxcf.mr_encoder_id)
|
||||
{
|
||||
LOWER_RES_FRAME_INFO* low_res_frame_info
|
||||
= (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
|
||||
if (cpi->oxcf.mr_total_resolutions > 1) {
|
||||
LOWER_RES_FRAME_INFO* low_res_frame_info
|
||||
= (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
|
||||
|
||||
if (cpi->oxcf.mr_encoder_id) {
|
||||
|
||||
// TODO(marpan): This constraint shouldn't be needed, as we would like
|
||||
// to allow for key frame setting (forced or periodic) defined per
|
||||
// spatial layer. For now, keep this in.
|
||||
cm->frame_type = low_res_frame_info->frame_type;
|
||||
|
||||
// Check if lower resolution is available for motion vector reuse.
|
||||
if(cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
cpi->mr_low_res_mv_avail = 1;
|
||||
cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
|
||||
cpi->mr_low_res_mv_avail = 1;
|
||||
cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
|
||||
|
||||
if (cpi->ref_frame_flags & VP8_LAST_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
|
||||
if (cpi->ref_frame_flags & VP8_LAST_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
|
||||
|
||||
if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
|
||||
if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
|
||||
|
||||
if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
|
||||
// Don't use altref to determine whether low res is available.
|
||||
// TODO (marpan): Should we make this type of condition on a
|
||||
// per-reference frame basis?
|
||||
/*
|
||||
if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
|
||||
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
|
||||
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// On a key frame: For the lowest resolution, keep track of the key frame
|
||||
// counter value. For the higher resolutions, reset the current video
|
||||
// frame counter to that of the lowest resolution.
|
||||
// This is done to the handle the case where we may stop/start encoding
|
||||
// higher layer(s). The restart-encoding of higher layer is only signaled
|
||||
// by a key frame for now.
|
||||
// TODO (marpan): Add flag to indicate restart-encoding of higher layer.
|
||||
if (cm->frame_type == KEY_FRAME) {
|
||||
if (cpi->oxcf.mr_encoder_id) {
|
||||
// If the initial starting value of the buffer level is zero (this can
|
||||
// happen because we may have not started encoding this higher stream),
|
||||
// then reset it to non-zero value based on |starting_buffer_level|.
|
||||
if (cpi->common.current_video_frame == 0 && cpi->buffer_level == 0) {
|
||||
unsigned int i;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
for (i = 0; i < cpi->oxcf.number_of_layers; i++) {
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[i];
|
||||
lc->bits_off_target = lc->starting_buffer_level;
|
||||
lc->buffer_level = lc->starting_buffer_level;
|
||||
}
|
||||
}
|
||||
cpi->common.current_video_frame =
|
||||
low_res_frame_info->key_frame_counter_value;
|
||||
} else {
|
||||
low_res_frame_info->key_frame_counter_value =
|
||||
cpi->common.current_video_frame;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find the reference frame closest to the current frame.
|
||||
cpi->closest_reference_frame = LAST_FRAME;
|
||||
if (cm->frame_type != KEY_FRAME) {
|
||||
if(cm->frame_type != KEY_FRAME) {
|
||||
int i;
|
||||
MV_REFERENCE_FRAME closest_ref = INTRA_FRAME;
|
||||
if (cpi->ref_frame_flags & VP8_LAST_FRAME) {
|
||||
@@ -3619,12 +3803,12 @@ static void encode_frame_to_data_rate
|
||||
} else if (cpi->ref_frame_flags & VP8_ALTR_FRAME) {
|
||||
closest_ref = ALTREF_FRAME;
|
||||
}
|
||||
for (i = 1; i <= 3; i++) {
|
||||
for(i = 1; i <= 3; i++) {
|
||||
vpx_ref_frame_type_t ref_frame_type = (vpx_ref_frame_type_t)
|
||||
((i == 3) ? 4 : i);
|
||||
if (cpi->ref_frame_flags & ref_frame_type) {
|
||||
if ((cm->current_video_frame - cpi->current_ref_frames[i]) <
|
||||
(cm->current_video_frame - cpi->current_ref_frames[closest_ref])) {
|
||||
(cm->current_video_frame - cpi->current_ref_frames[closest_ref])) {
|
||||
closest_ref = i;
|
||||
}
|
||||
}
|
||||
@@ -3650,7 +3834,9 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
|
||||
// Reset the zero_last counter to 0 on key frame.
|
||||
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -4001,7 +4187,10 @@ static void encode_frame_to_data_rate
|
||||
*/
|
||||
if (cpi->cyclic_refresh_mode_enabled)
|
||||
{
|
||||
if (cpi->current_layer==0)
|
||||
// Special case for screen_content_mode with golden frame updates.
|
||||
int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 &&
|
||||
cm->refresh_golden_frame);
|
||||
if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf)
|
||||
cyclic_background_refresh(cpi, Q, 0);
|
||||
else
|
||||
disable_segmentation(cpi);
|
||||
@@ -4179,8 +4368,10 @@ static void encode_frame_to_data_rate
|
||||
else
|
||||
disable_segmentation(cpi);
|
||||
}
|
||||
// Reset the consec_zero_last counter on key frame.
|
||||
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
// Reset the zero_last counter to 0 on key frame.
|
||||
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
vp8_set_quantizer(cpi, Q);
|
||||
}
|
||||
|
||||
@@ -4203,7 +4394,7 @@ static void encode_frame_to_data_rate
|
||||
if (cm->refresh_entropy_probs == 0)
|
||||
{
|
||||
/* save a copy for later refresh */
|
||||
vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
|
||||
}
|
||||
|
||||
vp8_update_coef_context(cpi);
|
||||
@@ -4221,6 +4412,11 @@ static void encode_frame_to_data_rate
|
||||
/* transform / motion compensation build reconstruction frame */
|
||||
vp8_encode_frame(cpi);
|
||||
|
||||
if (cpi->oxcf.screen_content_mode == 2) {
|
||||
if (vp8_drop_encodedframe_overshoot(cpi, Q))
|
||||
return;
|
||||
}
|
||||
|
||||
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
|
||||
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
|
||||
#endif
|
||||
@@ -4613,6 +4809,22 @@ static void encode_frame_to_data_rate
|
||||
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
// Get some measure of the amount of noise, by measuring the (partial) mse
|
||||
// between source and denoised buffer, for y channel. Partial refers to
|
||||
// computing the sse for a sub-sample of the frame (i.e., skip x blocks along row/column),
|
||||
// and only for blocks in that set that are consecutive ZEROMV_LAST mode.
|
||||
// Do this every ~8 frames, to further reduce complexity.
|
||||
// TODO(marpan): Keep this for now for the case cpi->oxcf.noise_sensitivity < 4,
|
||||
// should be removed in favor of the process_denoiser_mode_change() function below.
|
||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||
cpi->oxcf.noise_sensitivity < 4 &&
|
||||
!cpi->oxcf.screen_content_mode &&
|
||||
cpi->frames_since_key%8 == 0 &&
|
||||
cm->frame_type != KEY_FRAME) {
|
||||
cpi->mse_source_denoised = measure_square_diff_partial(
|
||||
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi->Source, cpi);
|
||||
}
|
||||
|
||||
// For the adaptive denoising mode (noise_sensitivity == 4), sample the mse
|
||||
// of source diff (between current and previous frame), and determine if we
|
||||
// should switch the denoiser mode. Sampling refers to computing the mse for
|
||||
@@ -4621,6 +4833,7 @@ static void encode_frame_to_data_rate
|
||||
// constraint on the sum diff between blocks. This process is called every
|
||||
// ~8 frames, to further reduce complexity.
|
||||
if (cpi->oxcf.noise_sensitivity == 4 &&
|
||||
!cpi->oxcf.screen_content_mode &&
|
||||
cpi->frames_since_key % 8 == 0 &&
|
||||
cm->frame_type != KEY_FRAME) {
|
||||
process_denoiser_mode_change(cpi);
|
||||
@@ -4758,6 +4971,13 @@ static void encode_frame_to_data_rate
|
||||
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
|
||||
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
|
||||
|
||||
// If the frame dropper is not enabled, don't let the buffer level go below
|
||||
// some threshold, given here by -|maximum_buffer_size|. For now we only do
|
||||
// this for screen content input.
|
||||
if (cpi->drop_frames_allowed == 0 && cpi->oxcf.screen_content_mode &&
|
||||
cpi->bits_off_target < -cpi->oxcf.maximum_buffer_size)
|
||||
cpi->bits_off_target = -cpi->oxcf.maximum_buffer_size;
|
||||
|
||||
/* Rolling monitors of whether we are over or underspending used to
|
||||
* help regulate min and Max Q in two pass.
|
||||
*/
|
||||
@@ -5232,7 +5452,26 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
|
||||
cpi->ref_framerate = 10000000.0 / avg_duration;
|
||||
}
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
if (cpi->oxcf.mr_total_resolutions > 1) {
|
||||
LOWER_RES_FRAME_INFO* low_res_frame_info = (LOWER_RES_FRAME_INFO*)
|
||||
cpi->oxcf.mr_low_res_mode_info;
|
||||
// Frame rate should be the same for all spatial layers in
|
||||
// multi-res-encoding (simulcast), so we constrain the frame for
|
||||
// higher layers to be that of lowest resolution. This is needed
|
||||
// as he application may decide to skip encoding a high layer and
|
||||
// then start again, in which case a big jump in time-stamps will
|
||||
// be received for that high layer, which will yield an incorrect
|
||||
// frame rate (from time-stamp adjustment in above calculation).
|
||||
if (cpi->oxcf.mr_encoder_id) {
|
||||
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
|
||||
}
|
||||
else {
|
||||
// Keep track of frame rate for lowest resolution.
|
||||
low_res_frame_info->low_res_framerate = cpi->ref_framerate;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
{
|
||||
unsigned int i;
|
||||
@@ -5262,8 +5501,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
update_layer_contexts (cpi);
|
||||
|
||||
/* Restore layer specific context & set frame rate */
|
||||
layer = cpi->oxcf.layer_id[
|
||||
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
|
||||
if (cpi->temporal_layer_id >= 0) {
|
||||
layer = cpi->temporal_layer_id;
|
||||
} else {
|
||||
layer = cpi->oxcf.layer_id[
|
||||
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
|
||||
}
|
||||
restore_layer_context (cpi, layer);
|
||||
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
|
||||
}
|
||||
@@ -5382,19 +5625,19 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
|
||||
if (cm->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
|
||||
memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
|
||||
}
|
||||
|
||||
/* Save the contexts separately for alt ref, gold and last. */
|
||||
/* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */
|
||||
if(cm->refresh_alt_ref_frame)
|
||||
vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
if(cm->refresh_golden_frame)
|
||||
vpx_memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
if(cm->refresh_last_frame)
|
||||
vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
/* if its a dropped frame honor the requests on subsequent frames */
|
||||
if (*size > 0)
|
||||
@@ -5439,19 +5682,23 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
double frame_psnr;
|
||||
YV12_BUFFER_CONFIG *orig = cpi->Source;
|
||||
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
|
||||
int y_samples = orig->y_height * orig->y_width ;
|
||||
int uv_samples = orig->uv_height * orig->uv_width ;
|
||||
unsigned int y_width = cpi->common.Width;
|
||||
unsigned int y_height = cpi->common.Height;
|
||||
unsigned int uv_width = (y_width + 1) / 2;
|
||||
unsigned int uv_height = (y_height + 1) / 2;
|
||||
int y_samples = y_height * y_width;
|
||||
int uv_samples = uv_height * uv_width;
|
||||
int t_samples = y_samples + 2 * uv_samples;
|
||||
double sq_error;
|
||||
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
|
||||
recon->y_buffer, recon->y_stride, y_width, y_height);
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
|
||||
recon->u_buffer, recon->uv_stride, uv_width, uv_height);
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
|
||||
recon->v_buffer, recon->uv_stride, uv_width, uv_height);
|
||||
|
||||
sq_error = (double)(ye + ue + ve);
|
||||
|
||||
@@ -5473,13 +5720,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
vp8_clear_system_state();
|
||||
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height);
|
||||
pp->y_buffer, pp->y_stride, y_width, y_height);
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
|
||||
pp->u_buffer, pp->uv_stride, uv_width, uv_height);
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
|
||||
pp->v_buffer, pp->uv_stride, uv_width, uv_height);
|
||||
|
||||
sq_error2 = (double)(ye + ue + ve);
|
||||
|
||||
@@ -5606,6 +5853,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
|
||||
cpi->common.show_frame_mi = cpi->common.mi;
|
||||
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
|
||||
#else
|
||||
(void)flags;
|
||||
|
||||
if (cpi->common.frame_to_show)
|
||||
{
|
||||
@@ -5698,7 +5946,7 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns
|
||||
{
|
||||
if (map)
|
||||
{
|
||||
vpx_memcpy(cpi->active_map, map, rows * cols);
|
||||
memcpy(cpi->active_map, map, rows * cols);
|
||||
cpi->active_map_enabled = 1;
|
||||
}
|
||||
else
|
||||
@@ -5745,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
|
||||
for (j = 0; j < source->y_width; j += 16)
|
||||
{
|
||||
unsigned int sse;
|
||||
Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
|
||||
Total += vpx_mse16x16(src + j, source->y_stride,
|
||||
dst + j, dest->y_stride, &sse);
|
||||
}
|
||||
|
||||
src += 16 * source->y_stride;
|
||||
|
||||
@@ -513,10 +513,20 @@ typedef struct VP8_COMP
|
||||
signed char *cyclic_refresh_map;
|
||||
// Count on how many (consecutive) times a macroblock uses ZER0MV_LAST.
|
||||
unsigned char *consec_zero_last;
|
||||
// Counter that is reset when a block is checked for a mode-bias against
|
||||
// ZEROMV_LASTREF.
|
||||
unsigned char *consec_zero_last_mvbias;
|
||||
|
||||
// Frame counter for the temporal pattern. Counter is rest when the temporal
|
||||
// layers are changed dynamically (run-time change).
|
||||
unsigned int temporal_pattern_counter;
|
||||
// Temporal layer id.
|
||||
int temporal_layer_id;
|
||||
|
||||
// Measure of average squared difference between source and denoised signal.
|
||||
int mse_source_denoised;
|
||||
|
||||
int force_maxqp;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* multithread data */
|
||||
@@ -657,6 +667,9 @@ typedef struct VP8_COMP
|
||||
|
||||
int droppable;
|
||||
|
||||
int initial_width;
|
||||
int initial_height;
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
VP8_DENOISER denoiser;
|
||||
#endif
|
||||
@@ -687,6 +700,7 @@ typedef struct VP8_COMP
|
||||
#endif
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int current_ref_frames[MAX_REF_FRAMES];
|
||||
// Closest reference frame to current frame.
|
||||
MV_REFERENCE_FRAME closest_reference_frame;
|
||||
|
||||
struct rd_costs_struct
|
||||
@@ -702,6 +716,11 @@ typedef struct VP8_COMP
|
||||
} rd_costs;
|
||||
} VP8_COMP;
|
||||
|
||||
void vp8_alloc_compressor_data(VP8_COMP *cpi);
|
||||
int vp8_reverse_trans(int x);
|
||||
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
|
||||
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
|
||||
|
||||
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
|
||||
unsigned char *dest_end, unsigned long *size);
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <limits.h>
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "onyx_int.h"
|
||||
#include "modecosts.h"
|
||||
#include "encodeintra.h"
|
||||
@@ -29,8 +30,6 @@
|
||||
#include "denoising.h"
|
||||
#endif
|
||||
|
||||
extern int VP8_UVSSE(MACROBLOCK *x);
|
||||
|
||||
#ifdef SPEEDSTATS
|
||||
extern unsigned int cnt_pm;
|
||||
#endif
|
||||
@@ -38,7 +37,133 @@ extern unsigned int cnt_pm;
|
||||
extern const int vp8_ref_frame_order[MAX_MODES];
|
||||
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
|
||||
|
||||
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
|
||||
// Fixed point implementation of a skin color classifier. Skin color
|
||||
// is model by a Gaussian distribution in the CbCr color space.
|
||||
// See ../../test/skin_color_detector_test.cc where the reference
|
||||
// skin color classifier is defined.
|
||||
|
||||
// Fixed-point skin color model parameters.
|
||||
static const int skin_mean[2] = {7463, 9614}; // q6
|
||||
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
|
||||
static const int skin_threshold = 1570636; // q18
|
||||
|
||||
// Evaluates the Mahalanobis distance measure for the input CbCr values.
|
||||
static int evaluate_skin_color_difference(int cb, int cr)
|
||||
{
|
||||
const int cb_q6 = cb << 6;
|
||||
const int cr_q6 = cr << 6;
|
||||
const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
|
||||
const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
|
||||
const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
|
||||
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
|
||||
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
|
||||
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
|
||||
const int skin_diff = skin_inv_cov[0] * cb_diff_q2 +
|
||||
skin_inv_cov[1] * cbcr_diff_q2 +
|
||||
skin_inv_cov[2] * cbcr_diff_q2 +
|
||||
skin_inv_cov[3] * cr_diff_q2;
|
||||
return skin_diff;
|
||||
}
|
||||
|
||||
static int macroblock_corner_grad(unsigned char* signal, int stride,
|
||||
int offsetx, int offsety, int sgnx, int sgny)
|
||||
{
|
||||
int y1 = signal[offsetx * stride + offsety];
|
||||
int y2 = signal[offsetx * stride + offsety + sgny];
|
||||
int y3 = signal[(offsetx + sgnx) * stride + offsety];
|
||||
int y4 = signal[(offsetx + sgnx) * stride + offsety + sgny];
|
||||
return MAX(MAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4));
|
||||
}
|
||||
|
||||
static int check_dot_artifact_candidate(VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
unsigned char *target_last,
|
||||
int stride,
|
||||
unsigned char* last_ref,
|
||||
int mb_row,
|
||||
int mb_col,
|
||||
int channel)
|
||||
{
|
||||
int threshold1 = 6;
|
||||
int threshold2 = 3;
|
||||
unsigned int max_num = (cpi->common.MBs) / 10;
|
||||
int grad_last = 0;
|
||||
int grad_source = 0;
|
||||
int index = mb_row * cpi->common.mb_cols + mb_col;
|
||||
// Threshold for #consecutive (base layer) frames using zero_last mode.
|
||||
int num_frames = 30;
|
||||
int shift = 15;
|
||||
if (channel > 0) {
|
||||
shift = 7;
|
||||
}
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
{
|
||||
num_frames = 20;
|
||||
}
|
||||
x->zero_last_dot_suppress = 0;
|
||||
// Blocks on base layer frames that have been using ZEROMV_LAST repeatedly
|
||||
// (i.e, at least |x| consecutive frames are candidates for increasing the
|
||||
// rd adjustment for zero_last mode.
|
||||
// Only allow this for at most |max_num| blocks per frame.
|
||||
// Don't allow this for screen content input.
|
||||
if (cpi->current_layer == 0 &&
|
||||
cpi->consec_zero_last_mvbias[index] > num_frames &&
|
||||
x->mbs_zero_last_dot_suppress < max_num &&
|
||||
!cpi->oxcf.screen_content_mode)
|
||||
{
|
||||
// If this block is checked here, label it so we don't check it again until
|
||||
// ~|x| framaes later.
|
||||
x->zero_last_dot_suppress = 1;
|
||||
// Dot artifact is noticeable as strong gradient at corners of macroblock,
|
||||
// for flat areas. As a simple detector for now, we look for a high
|
||||
// corner gradient on last ref, and a smaller gradient on source.
|
||||
// Check 4 corners, return if any satisfy condition.
|
||||
// Top-left:
|
||||
grad_last = macroblock_corner_grad(last_ref, stride, 0, 0, 1, 1);
|
||||
grad_source = macroblock_corner_grad(target_last, stride, 0, 0, 1, 1);
|
||||
if (grad_last >= threshold1 && grad_source <= threshold2)
|
||||
{
|
||||
x->mbs_zero_last_dot_suppress++;
|
||||
return 1;
|
||||
}
|
||||
// Top-right:
|
||||
grad_last = macroblock_corner_grad(last_ref, stride, 0, shift, 1, -1);
|
||||
grad_source = macroblock_corner_grad(target_last, stride, 0, shift, 1, -1);
|
||||
if (grad_last >= threshold1 && grad_source <= threshold2)
|
||||
{
|
||||
x->mbs_zero_last_dot_suppress++;
|
||||
return 1;
|
||||
}
|
||||
// Bottom-left:
|
||||
grad_last = macroblock_corner_grad(last_ref, stride, shift, 0, -1, 1);
|
||||
grad_source = macroblock_corner_grad(target_last, stride, shift, 0, -1, 1);
|
||||
if (grad_last >= threshold1 && grad_source <= threshold2)
|
||||
{
|
||||
x->mbs_zero_last_dot_suppress++;
|
||||
return 1;
|
||||
}
|
||||
// Bottom-right:
|
||||
grad_last = macroblock_corner_grad(last_ref, stride, shift, shift, -1, -1);
|
||||
grad_source = macroblock_corner_grad(target_last, stride, shift, shift, -1, -1);
|
||||
if (grad_last >= threshold1 && grad_source <= threshold2)
|
||||
{
|
||||
x->mbs_zero_last_dot_suppress++;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Checks if the input yCbCr values corresponds to skin color.
|
||||
static int is_skin_color(int y, int cb, int cr)
|
||||
{
|
||||
if (y < 40 || y > 220)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
|
||||
}
|
||||
|
||||
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
|
||||
int_mv *bestmv, int_mv *ref_mv,
|
||||
@@ -52,6 +177,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
|
||||
(void) ref_mv;
|
||||
(void) error_per_bit;
|
||||
(void) vfp;
|
||||
(void) mb;
|
||||
(void) mvcost;
|
||||
(void) distortion;
|
||||
(void) sse;
|
||||
@@ -90,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_get4x4sse_cs_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride
|
||||
)
|
||||
{
|
||||
int distortion = 0;
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int diff = src_ptr[c] - ref_ptr[c];
|
||||
distortion += diff * diff;
|
||||
}
|
||||
|
||||
src_ptr += source_stride;
|
||||
ref_ptr += recon_stride;
|
||||
}
|
||||
|
||||
return distortion;
|
||||
}
|
||||
|
||||
static int get_prediction_error(BLOCK *be, BLOCKD *b)
|
||||
{
|
||||
unsigned char *sptr;
|
||||
@@ -124,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
|
||||
sptr = (*(be->base_src) + be->src);
|
||||
dptr = b->predictor;
|
||||
|
||||
return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
|
||||
return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
|
||||
|
||||
}
|
||||
|
||||
@@ -514,10 +613,16 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2,
|
||||
#endif
|
||||
|
||||
// Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame.
|
||||
if (this_mode == ZEROMV &&
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
|
||||
(denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME))
|
||||
// TODO: We should also add condition on distance of closest to current.
|
||||
if(!cpi->oxcf.screen_content_mode &&
|
||||
this_mode == ZEROMV &&
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
|
||||
(denoise_aggressive || (cpi->closest_reference_frame == LAST_FRAME)))
|
||||
{
|
||||
// No adjustment if block is considered to be skin area.
|
||||
if(x->is_skin)
|
||||
rd_adj = 100;
|
||||
|
||||
this_rd = ((int64_t)this_rd) * rd_adj / 100;
|
||||
}
|
||||
|
||||
@@ -598,6 +703,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
#endif
|
||||
|
||||
int sf_improved_mv_pred = cpi->sf.improved_mv_pred;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
int dissim = INT_MAX;
|
||||
int parent_ref_frame = 0;
|
||||
int_mv parent_ref_mv;
|
||||
MB_PREDICTION_MODE parent_mode = 0;
|
||||
int parent_ref_valid = 0;
|
||||
#endif
|
||||
|
||||
int_mv mvp;
|
||||
|
||||
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
@@ -608,14 +722,56 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
unsigned char *plane[4][3];
|
||||
int ref_frame_map[4];
|
||||
int sign_bias = 0;
|
||||
int dot_artifact_candidate = 0;
|
||||
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
|
||||
|
||||
// If the current frame is using LAST as a reference, check for
|
||||
// biasing the mode selection for dot artifacts.
|
||||
if (cpi->ref_frame_flags & VP8_LAST_FRAME) {
|
||||
unsigned char* target_y = x->src.y_buffer;
|
||||
unsigned char* target_u = x->block[16].src + *x->block[16].base_src;
|
||||
unsigned char* target_v = x->block[20].src + *x->block[20].base_src;
|
||||
int stride = x->src.y_stride;
|
||||
int stride_uv = x->block[16].src_stride;
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity) {
|
||||
const int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0;
|
||||
target_y =
|
||||
cpi->denoiser.yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset;
|
||||
stride = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_stride;
|
||||
if (uv_denoise) {
|
||||
target_u =
|
||||
cpi->denoiser.yv12_running_avg[LAST_FRAME].u_buffer +
|
||||
recon_uvoffset;
|
||||
target_v =
|
||||
cpi->denoiser.yv12_running_avg[LAST_FRAME].v_buffer +
|
||||
recon_uvoffset;
|
||||
stride_uv = cpi->denoiser.yv12_running_avg[LAST_FRAME].uv_stride;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dot_artifact_candidate =
|
||||
check_dot_artifact_candidate(cpi, x, target_y, stride,
|
||||
plane[LAST_FRAME][0], mb_row, mb_col, 0);
|
||||
// If not found in Y channel, check UV channel.
|
||||
if (!dot_artifact_candidate) {
|
||||
dot_artifact_candidate =
|
||||
check_dot_artifact_candidate(cpi, x, target_u, stride_uv,
|
||||
plane[LAST_FRAME][1], mb_row, mb_col, 1);
|
||||
if (!dot_artifact_candidate) {
|
||||
dot_artifact_candidate =
|
||||
check_dot_artifact_candidate(cpi, x, target_v, stride_uv,
|
||||
plane[LAST_FRAME][2], mb_row, mb_col, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
int dissim = INT_MAX;
|
||||
int parent_ref_frame = 0;
|
||||
int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
|
||||
int_mv parent_ref_mv;
|
||||
MB_PREDICTION_MODE parent_mode = 0;
|
||||
|
||||
// |parent_ref_valid| will be set here if potentially we can do mv resue for
|
||||
// this higher resol (|cpi->oxcf.mr_encoder_id| > 0) frame.
|
||||
// |parent_ref_valid| may be reset depending on |parent_ref_frame| for
|
||||
// the current macroblock below.
|
||||
parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
|
||||
if (parent_ref_valid)
|
||||
{
|
||||
int parent_ref_flag;
|
||||
@@ -633,24 +789,51 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
* In this event, take the conservative approach of disabling the
|
||||
* lower res info for this MB.
|
||||
*/
|
||||
|
||||
parent_ref_flag = 0;
|
||||
// Note availability for mv reuse is only based on last and golden.
|
||||
if (parent_ref_frame == LAST_FRAME)
|
||||
parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME);
|
||||
else if (parent_ref_frame == GOLDEN_FRAME)
|
||||
parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME);
|
||||
else if (parent_ref_frame == ALTREF_FRAME)
|
||||
parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME);
|
||||
|
||||
//assert(!parent_ref_frame || parent_ref_flag);
|
||||
|
||||
// If |parent_ref_frame| did not match either last or golden then
|
||||
// shut off mv reuse.
|
||||
if (parent_ref_frame && !parent_ref_flag)
|
||||
parent_ref_valid = 0;
|
||||
|
||||
// Don't do mv reuse since we want to allow for another mode besides
|
||||
// ZEROMV_LAST to remove dot artifact.
|
||||
if (dot_artifact_candidate)
|
||||
parent_ref_valid = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if current macroblock is in skin area.
|
||||
{
|
||||
const int y = x->src.y_buffer[7 * x->src.y_stride + 7];
|
||||
const int cb = x->src.u_buffer[3 * x->src.uv_stride + 3];
|
||||
const int cr = x->src.v_buffer[3 * x->src.uv_stride + 3];
|
||||
x->is_skin = 0;
|
||||
if (!cpi->oxcf.screen_content_mode)
|
||||
x->is_skin = is_skin_color(y, cb, cr);
|
||||
}
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity) {
|
||||
// Under aggressive denoising mode, should we use skin map to reduce denoiser
|
||||
// and ZEROMV bias? Will need to revisit the accuracy of this detection for
|
||||
// very noisy input. For now keep this as is (i.e., don't turn it off).
|
||||
// if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive)
|
||||
// x->is_skin = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
mode_mv = mode_mv_sb[sign_bias];
|
||||
best_ref_mv.as_int = 0;
|
||||
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
|
||||
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
memset(&best_mbmode, 0, sizeof(best_mbmode));
|
||||
|
||||
/* Setup search priorities */
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
@@ -681,8 +864,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
|
||||
}
|
||||
|
||||
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
|
||||
|
||||
/* Count of the number of MBs tested so far this frame */
|
||||
x->mbs_tested_so_far++;
|
||||
|
||||
@@ -692,9 +873,13 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
|
||||
/* If the frame has big static background and current MB is in low
|
||||
* motion area, its mode decision is biased to ZEROMV mode.
|
||||
*/
|
||||
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
|
||||
* motion area, its mode decision is biased to ZEROMV mode.
|
||||
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
|
||||
* At such speed settings, ZEROMV is already heavily favored.
|
||||
*/
|
||||
if (cpi->Speed < 12) {
|
||||
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
|
||||
}
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity) {
|
||||
@@ -703,6 +888,13 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dot_artifact_candidate)
|
||||
{
|
||||
// Bias against ZEROMV_LAST mode.
|
||||
rd_adjustment = 150;
|
||||
}
|
||||
|
||||
|
||||
/* if we encode a new mv this is important
|
||||
* find the best new motion vector
|
||||
*/
|
||||
@@ -819,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
else
|
||||
{
|
||||
rate2 += rate;
|
||||
distortion2 = vp8_variance16x16(
|
||||
distortion2 = vpx_variance16x16(
|
||||
*(b->base_src), b->src_stride,
|
||||
x->e_mbd.predictor, 16, &sse);
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
@@ -848,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
xd->dst.y_stride,
|
||||
xd->predictor,
|
||||
16);
|
||||
distortion2 = vp8_variance16x16
|
||||
distortion2 = vpx_variance16x16
|
||||
(*(b->base_src), b->src_stride,
|
||||
x->e_mbd.predictor, 16, &sse);
|
||||
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
@@ -888,14 +1080,17 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
step_param = cpi->sf.first_step + speed_adjust;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* If lower-res drops this frame, then higher-res encoder does
|
||||
motion search without any previous knowledge. Also, since
|
||||
last frame motion info is not stored, then we can not
|
||||
/* If lower-res frame is not available for mv reuse (because of
|
||||
frame dropping or different temporal layer pattern), then higher
|
||||
resol encoder does motion search without any previous knowledge.
|
||||
Also, since last frame motion info is not stored, then we can not
|
||||
use improved_mv_pred. */
|
||||
if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
|
||||
if (cpi->oxcf.mr_encoder_id)
|
||||
sf_improved_mv_pred = 0;
|
||||
|
||||
if (parent_ref_valid && parent_ref_frame)
|
||||
// Only use parent MV as predictor if this candidate reference frame
|
||||
// (|this_ref_frame|) is equal to |parent_ref_frame|.
|
||||
if (parent_ref_valid && (parent_ref_frame == this_ref_frame))
|
||||
{
|
||||
/* Use parent MV as predictor. Adjust search range
|
||||
* accordingly.
|
||||
@@ -939,7 +1134,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
}
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
if (parent_ref_valid && parent_ref_frame && dissim <= 2 &&
|
||||
if (parent_ref_valid && (parent_ref_frame == this_ref_frame) &&
|
||||
dissim <= 2 &&
|
||||
MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
|
||||
abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
|
||||
{
|
||||
@@ -976,10 +1172,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
* change the behavior in lowest-resolution encoder.
|
||||
* Will improve it later.
|
||||
*/
|
||||
/* Set step_param to 0 to ensure large-range motion search
|
||||
when encoder drops this frame at lower-resolution.
|
||||
*/
|
||||
if (!parent_ref_valid)
|
||||
/* Set step_param to 0 to ensure large-range motion search
|
||||
* when mv reuse if not valid (i.e. |parent_ref_valid| = 0),
|
||||
* or if this candidate reference frame (|this_ref_frame|) is
|
||||
* not equal to |parent_ref_frame|.
|
||||
*/
|
||||
if (!parent_ref_valid || (parent_ref_frame != this_ref_frame))
|
||||
step_param = 0;
|
||||
#endif
|
||||
bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv,
|
||||
@@ -1081,18 +1279,24 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
|
||||
/* Store for later use by denoiser. */
|
||||
if (this_mode == ZEROMV && sse < zero_mv_sse )
|
||||
// Dont' denoise with GOLDEN OR ALTREF is they are old reference
|
||||
// frames (greater than MAX_GF_ARF_DENOISE_RANGE frames in past).
|
||||
int skip_old_reference = ((this_ref_frame != LAST_FRAME) &&
|
||||
(cpi->common.current_video_frame -
|
||||
cpi->current_ref_frames[this_ref_frame] >
|
||||
MAX_GF_ARF_DENOISE_RANGE)) ? 1 : 0;
|
||||
if (this_mode == ZEROMV && sse < zero_mv_sse &&
|
||||
!skip_old_reference)
|
||||
{
|
||||
zero_mv_sse = sse;
|
||||
x->best_zeromv_reference_frame =
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame;
|
||||
}
|
||||
|
||||
/* Store the best NEWMV in x for later use in the denoiser. */
|
||||
// Store the best NEWMV in x for later use in the denoiser.
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
|
||||
sse < best_sse)
|
||||
sse < best_sse && !skip_old_reference)
|
||||
{
|
||||
best_sse = sse;
|
||||
x->best_sse_inter_mode = NEWMV;
|
||||
@@ -1114,8 +1318,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
*returndistortion = distortion2;
|
||||
best_rd_sse = sse;
|
||||
best_rd = this_rd;
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
|
||||
/* Testing this mode gave rise to an improvement in best error
|
||||
* score. Lower threshold a bit for next time
|
||||
@@ -1178,6 +1382,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
int block_index = mb_row * cpi->common.mb_cols + mb_col;
|
||||
int reevaluate = 0;
|
||||
int is_noisy = 0;
|
||||
if (x->best_sse_inter_mode == DC_PRED)
|
||||
{
|
||||
/* No best MV found. */
|
||||
@@ -1187,18 +1393,52 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
x->best_reference_frame = best_mbmode.ref_frame;
|
||||
best_sse = best_rd_sse;
|
||||
}
|
||||
// For non-skin blocks that have selected ZEROMV for this current frame,
|
||||
// and have been selecting ZEROMV_LAST (on the base layer frame) at
|
||||
// least |x~20| consecutive past frames in a row, label the block for
|
||||
// possible increase in denoising strength. We also condition this
|
||||
// labeling on there being significant denoising in the scene
|
||||
if (cpi->oxcf.noise_sensitivity == 4) {
|
||||
if (cpi->denoiser.nmse_source_diff >
|
||||
70 * cpi->denoiser.threshold_aggressive_mode / 100)
|
||||
is_noisy = 1;
|
||||
} else {
|
||||
if (cpi->mse_source_denoised > 1000)
|
||||
is_noisy = 1;
|
||||
}
|
||||
x->increase_denoising = 0;
|
||||
if (!x->is_skin &&
|
||||
x->best_sse_inter_mode == ZEROMV &&
|
||||
(x->best_reference_frame == LAST_FRAME ||
|
||||
x->best_reference_frame == cpi->closest_reference_frame) &&
|
||||
cpi->consec_zero_last[block_index] >= 20 &&
|
||||
is_noisy) {
|
||||
x->increase_denoising = 1;
|
||||
}
|
||||
x->denoise_zeromv = 0;
|
||||
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
|
||||
recon_yoffset, recon_uvoffset,
|
||||
&cpi->common.lf_info, mb_row, mb_col,
|
||||
block_index);
|
||||
|
||||
/* Reevaluate ZEROMV after denoising. */
|
||||
if (best_mbmode.ref_frame == INTRA_FRAME &&
|
||||
// Reevaluate ZEROMV after denoising: for large noise content
|
||||
// (i.e., cpi->mse_source_denoised is above threshold), do this for all
|
||||
// blocks that did not pick ZEROMV as best mode but are using ZEROMV
|
||||
// for denoising. Otherwise, always re-evaluate for blocks that picked
|
||||
// INTRA mode as best mode.
|
||||
// Avoid blocks that have been biased against ZERO_LAST
|
||||
// (i.e., dot artifact candidate blocks).
|
||||
reevaluate = (best_mbmode.ref_frame == INTRA_FRAME) ||
|
||||
(best_mbmode.mode != ZEROMV &&
|
||||
x->denoise_zeromv &&
|
||||
cpi->mse_source_denoised > 2000);
|
||||
if (!dot_artifact_candidate &&
|
||||
reevaluate &&
|
||||
x->best_zeromv_reference_frame != INTRA_FRAME)
|
||||
{
|
||||
int this_rd = 0;
|
||||
int this_ref_frame = x->best_zeromv_reference_frame;
|
||||
rd_adjustment = 100;
|
||||
rate2 = x->ref_frame_cost[this_ref_frame] +
|
||||
vp8_cost_mv_ref(ZEROMV, mdcounts);
|
||||
distortion2 = 0;
|
||||
@@ -1217,8 +1457,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
|
||||
if (this_rd < best_rd)
|
||||
{
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1242,8 +1482,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
/* set to the best mb mode, this copy can be skip if x->skip since it
|
||||
* already has the right content */
|
||||
if (!x->skip)
|
||||
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
|
||||
sizeof(MB_MODE_INFO));
|
||||
|
||||
if (best_mbmode.mode <= B_PRED)
|
||||
{
|
||||
@@ -1258,7 +1498,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
update_mvcount(x, &best_ref_mv);
|
||||
}
|
||||
|
||||
|
||||
void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
|
||||
{
|
||||
int error4x4, error16x16 = INT_MAX;
|
||||
@@ -1282,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
|
||||
xd->dst.y_stride,
|
||||
xd->predictor,
|
||||
16);
|
||||
distortion = vp8_variance16x16
|
||||
distortion = vpx_variance16x16
|
||||
(*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
|
||||
rate = x->mbmode_cost[xd->frame_type][mode];
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "onyx_int.h"
|
||||
@@ -49,7 +50,7 @@ static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
|
||||
src_y = src_ybc->y_buffer + yoffset;
|
||||
dst_y = dst_ybc->y_buffer + yoffset;
|
||||
|
||||
vpx_memcpy(dst_y, src_y, ystride * linestocopy);
|
||||
memcpy(dst_y, src_y, ystride * linestocopy);
|
||||
}
|
||||
|
||||
static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
|
||||
@@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
|
||||
for (j = 0; j < source->y_width; j += 16)
|
||||
{
|
||||
unsigned int sse;
|
||||
Total += vp8_mse16x16(src + j, source->y_stride,
|
||||
Total += vpx_mse16x16(src + j, source->y_stride,
|
||||
dst + j, dest->y_stride,
|
||||
&sse);
|
||||
}
|
||||
@@ -142,7 +143,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
|
||||
int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
|
||||
int filt_val;
|
||||
int best_filt_val = cm->filter_level;
|
||||
int best_filt_val;
|
||||
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
|
||||
|
||||
/* Replace unfiltered frame buffer with a new one */
|
||||
@@ -274,8 +275,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
|
||||
int filter_step;
|
||||
int filt_high = 0;
|
||||
/* Start search at previous frame filter level */
|
||||
int filt_mid = cm->filter_level;
|
||||
int filt_mid;
|
||||
int filt_low = 0;
|
||||
int filt_best;
|
||||
int filt_direction = 0;
|
||||
@@ -287,7 +287,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
|
||||
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
|
||||
|
||||
vpx_memset(ss_err, 0, sizeof(ss_err));
|
||||
memset(ss_err, 0, sizeof(ss_err));
|
||||
|
||||
/* Replace unfiltered frame buffer with a new one */
|
||||
cm->frame_to_show = &cpi->pick_lf_lvl_frame;
|
||||
|
||||
@@ -65,8 +65,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *dequant_ptr = d->dequant;
|
||||
short zbin_oq_value = b->zbin_extra;
|
||||
|
||||
vpx_memset(qcoeff_ptr, 0, 32);
|
||||
vpx_memset(dqcoeff_ptr, 0, 32);
|
||||
memset(qcoeff_ptr, 0, 32);
|
||||
memset(dqcoeff_ptr, 0, 32);
|
||||
|
||||
eob = -1;
|
||||
|
||||
@@ -101,7 +101,7 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
*d->eob = (char)(eob + 1);
|
||||
}
|
||||
|
||||
void vp8_quantize_mby_c(MACROBLOCK *x)
|
||||
void vp8_quantize_mby(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||
@@ -114,7 +114,7 @@ void vp8_quantize_mby_c(MACROBLOCK *x)
|
||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
||||
}
|
||||
|
||||
void vp8_quantize_mb_c(MACROBLOCK *x)
|
||||
void vp8_quantize_mb(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||
@@ -125,7 +125,7 @@ void vp8_quantize_mb_c(MACROBLOCK *x)
|
||||
}
|
||||
|
||||
|
||||
void vp8_quantize_mbuv_c(MACROBLOCK *x)
|
||||
void vp8_quantize_mbuv(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -133,23 +133,6 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x)
|
||||
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
|
||||
}
|
||||
|
||||
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
|
||||
* these two C functions if corresponding optimized routine is not available.
|
||||
* NEON optimized version implements currently the fast quantization for pair
|
||||
* of blocks. */
|
||||
void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
|
||||
{
|
||||
vp8_regular_quantize_b(b1, d1);
|
||||
vp8_regular_quantize_b(b2, d2);
|
||||
}
|
||||
|
||||
void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
|
||||
{
|
||||
vp8_fast_quantize_b_c(b1, d1);
|
||||
vp8_fast_quantize_b_c(b2, d2);
|
||||
}
|
||||
|
||||
|
||||
static const int qrounding_factors[129] =
|
||||
{
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
@@ -552,6 +535,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
|
||||
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
|
||||
int update = 0;
|
||||
int new_delta_q;
|
||||
int new_uv_delta_q;
|
||||
cm->base_qindex = Q;
|
||||
|
||||
/* if any of the delta_q values are changing update flag has to be set */
|
||||
@@ -559,8 +543,6 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
|
||||
|
||||
cm->y1dc_delta_q = 0;
|
||||
cm->y2ac_delta_q = 0;
|
||||
cm->uvdc_delta_q = 0;
|
||||
cm->uvac_delta_q = 0;
|
||||
|
||||
if (Q < 4)
|
||||
{
|
||||
@@ -572,6 +554,21 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
|
||||
update |= cm->y2dc_delta_q != new_delta_q;
|
||||
cm->y2dc_delta_q = new_delta_q;
|
||||
|
||||
new_uv_delta_q = 0;
|
||||
// For screen content, lower the q value for UV channel. For now, select
|
||||
// conservative delta; same delta for dc and ac, and decrease it with lower
|
||||
// Q, and set to 0 below some threshold. May want to condition this in
|
||||
// future on the variance/energy in UV channel.
|
||||
if (cpi->oxcf.screen_content_mode && Q > 40) {
|
||||
new_uv_delta_q = -(int)(0.15 * Q);
|
||||
// Check range: magnitude of delta is 4 bits.
|
||||
if (new_uv_delta_q < -15) {
|
||||
new_uv_delta_q = -15;
|
||||
}
|
||||
}
|
||||
update |= cm->uvdc_delta_q != new_uv_delta_q;
|
||||
cm->uvdc_delta_q = new_uv_delta_q;
|
||||
cm->uvac_delta_q = new_uv_delta_q;
|
||||
|
||||
/* Set Segment specific quatizers */
|
||||
mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
|
||||
|
||||
@@ -18,6 +18,9 @@ extern "C" {
|
||||
|
||||
struct VP8_COMP;
|
||||
struct macroblock;
|
||||
extern void vp8_quantize_mb(struct macroblock *x);
|
||||
extern void vp8_quantize_mby(struct macroblock *x);
|
||||
extern void vp8_quantize_mbuv(struct macroblock *x);
|
||||
extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q);
|
||||
extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi);
|
||||
extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
|
||||
|
||||
@@ -296,7 +296,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
|
||||
vp8_default_coef_probs(& cpi->common);
|
||||
|
||||
vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
{
|
||||
int flag[2] = {1, 1};
|
||||
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
|
||||
@@ -305,9 +305,9 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
/* Make sure we initialize separate contexts for altref,gold, and normal.
|
||||
* TODO shouldn't need 3 different copies of structure to do this!
|
||||
*/
|
||||
vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
|
||||
cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
|
||||
|
||||
@@ -708,7 +708,13 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
|
||||
Adjustment = (cpi->this_frame_target - min_frame_target);
|
||||
|
||||
if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
|
||||
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
|
||||
{
|
||||
Adjustment = (cpi->current_gf_interval - 1) * Adjustment;
|
||||
// Limit adjustment to 10% of current target.
|
||||
if (Adjustment > (10 * cpi->this_frame_target) / 100)
|
||||
Adjustment = (10 * cpi->this_frame_target) / 100;
|
||||
cpi->this_frame_target += Adjustment;
|
||||
}
|
||||
else
|
||||
cpi->this_frame_target -= Adjustment;
|
||||
}
|
||||
@@ -1209,6 +1215,11 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
|
||||
{
|
||||
int Q = cpi->active_worst_quality;
|
||||
|
||||
if (cpi->force_maxqp == 1) {
|
||||
cpi->active_worst_quality = cpi->worst_quality;
|
||||
return cpi->worst_quality;
|
||||
}
|
||||
|
||||
/* Reset Zbin OQ value */
|
||||
cpi->mb.zbin_over_quant = 0;
|
||||
|
||||
@@ -1553,3 +1564,46 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
// If this just encoded frame (mcomp/transform/quant, but before loopfilter and
|
||||
// pack_bitstream) has large overshoot, and was not being encoded close to the
|
||||
// max QP, then drop this frame and force next frame to be encoded at max QP.
|
||||
// Condition this on 1 pass CBR with screen content mode and frame dropper off.
|
||||
// TODO(marpan): Should do this exit condition during the encode_frame
|
||||
// (i.e., halfway during the encoding of the frame) to save cycles.
|
||||
int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
|
||||
if (cpi->pass == 0 &&
|
||||
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
|
||||
cpi->drop_frames_allowed == 0 &&
|
||||
cpi->common.frame_type != KEY_FRAME) {
|
||||
// Note: the "projected_frame_size" from encode_frame() only gives estimate
|
||||
// of mode/motion vector rate (in non-rd mode): so below we only require
|
||||
// that projected_frame_size is somewhat greater than per-frame-bandwidth,
|
||||
// but add additional condition with high threshold on prediction residual.
|
||||
|
||||
// QP threshold: only allow dropping if we are not close to qp_max.
|
||||
int thresh_qp = 3 * cpi->worst_quality >> 2;
|
||||
// Rate threshold, in bytes.
|
||||
int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3);
|
||||
// Threshold for the average (over all macroblocks) of the pixel-sum
|
||||
// residual error over 16x16 block. Should add QP dependence on threshold?
|
||||
int thresh_pred_err_mb = (256 << 4);
|
||||
int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs);
|
||||
if (Q < thresh_qp &&
|
||||
cpi->projected_frame_size > thresh_rate &&
|
||||
pred_err_mb > thresh_pred_err_mb) {
|
||||
// Drop this frame: advance frame counters, and set force_maxqp flag.
|
||||
cpi->common.current_video_frame++;
|
||||
cpi->frames_since_key++;
|
||||
// Flag to indicate we will force next frame to be encoded at max QP.
|
||||
cpi->force_maxqp = 1;
|
||||
return 1;
|
||||
} else {
|
||||
cpi->force_maxqp = 0;
|
||||
return 0;
|
||||
}
|
||||
cpi->force_maxqp = 0;
|
||||
return 0;
|
||||
}
|
||||
cpi->force_maxqp = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
|
||||
/* return of 0 means drop frame */
|
||||
extern int vp8_pick_frame_size(VP8_COMP *cpi);
|
||||
|
||||
extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <assert.h>
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "tokenize.h"
|
||||
#include "treewriter.h"
|
||||
#include "onyx_int.h"
|
||||
@@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x)
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_variance8x8(uptr, pre_stride,
|
||||
vpx_variance8x8(uptr, pre_stride,
|
||||
upred_ptr, uv_stride, &sse2);
|
||||
vp8_variance8x8(vptr, pre_stride,
|
||||
vpx_variance8x8(vptr, pre_stride,
|
||||
vpred_ptr, uv_stride, &sse1);
|
||||
sse2 += sse1;
|
||||
}
|
||||
@@ -555,8 +556,8 @@ static int vp8_rdcost_mby(MACROBLOCK *mb)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -650,8 +651,8 @@ static int rd_pick_intra4x4block(
|
||||
* a temp buffer that meets the stride requirements, but we are only
|
||||
* interested in the left 4x4 block
|
||||
* */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4);
|
||||
DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
|
||||
DECLARE_ALIGNED(16, unsigned char, best_predictor[16*4]);
|
||||
DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
|
||||
int dst_stride = x->e_mbd.dst.y_stride;
|
||||
unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
|
||||
|
||||
@@ -691,7 +692,7 @@ static int rd_pick_intra4x4block(
|
||||
*a = tempa;
|
||||
*l = templ;
|
||||
copy_predictor(best_predictor, b->predictor);
|
||||
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
|
||||
memcpy(best_dqcoeff, b->dqcoeff, 32);
|
||||
}
|
||||
}
|
||||
b->bmi.as_mode = *best_mode;
|
||||
@@ -715,8 +716,8 @@ static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
|
||||
ENTROPY_CONTEXT *tl;
|
||||
const int *bmode_costs;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -820,8 +821,8 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -837,6 +838,9 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
|
||||
int *distortion, int fullpixel)
|
||||
{
|
||||
(void)cpi;
|
||||
(void)fullpixel;
|
||||
|
||||
vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
|
||||
vp8_subtract_mbuv(x->src_diff,
|
||||
x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
|
||||
@@ -854,6 +858,9 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
|
||||
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
|
||||
int *distortion, int fullpixel)
|
||||
{
|
||||
(void)cpi;
|
||||
(void)fullpixel;
|
||||
|
||||
vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
|
||||
vp8_subtract_mbuv(x->src_diff,
|
||||
x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
|
||||
@@ -1122,8 +1129,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
ENTROPY_CONTEXT *ta_b;
|
||||
ENTROPY_CONTEXT *tl_b;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -1166,8 +1173,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
ENTROPY_CONTEXT *ta_s;
|
||||
ENTROPY_CONTEXT *tl_s;
|
||||
|
||||
vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta_s = (ENTROPY_CONTEXT *)&t_above_s;
|
||||
tl_s = (ENTROPY_CONTEXT *)&t_left_s;
|
||||
@@ -1323,14 +1330,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
mode_selected = this_mode;
|
||||
best_label_rd = this_rd;
|
||||
|
||||
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
}
|
||||
} /*for each 4x4 mode*/
|
||||
|
||||
vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
|
||||
bsi->ref_mv, x->mvcost);
|
||||
@@ -1386,7 +1393,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
int i;
|
||||
BEST_SEG_INFO bsi;
|
||||
|
||||
vpx_memset(&bsi, 0, sizeof(bsi));
|
||||
memset(&bsi, 0, sizeof(bsi));
|
||||
|
||||
bsi.segment_rd = best_rd;
|
||||
bsi.ref_mv = best_ref_mv;
|
||||
@@ -1655,7 +1662,6 @@ void vp8_mv_pred
|
||||
mv.as_mv.row = mvx[vcnt/2];
|
||||
mv.as_mv.col = mvy[vcnt/2];
|
||||
|
||||
find = 1;
|
||||
/* sr is set to 0 to allow calling function to decide the search
|
||||
* range.
|
||||
*/
|
||||
@@ -1685,16 +1691,16 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
|
||||
}else if(xd->mb_to_top_edge==0)
|
||||
{ /* only has left MB for sad calculation. */
|
||||
near_sad[0] = near_sad[2] = INT_MAX;
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
|
||||
}else if(xd->mb_to_left_edge ==0)
|
||||
{ /* only has left MB for sad calculation. */
|
||||
near_sad[1] = near_sad[2] = INT_MAX;
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
|
||||
}else
|
||||
{
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
|
||||
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
|
||||
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride);
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
@@ -1709,14 +1715,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
|
||||
if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
|
||||
|
||||
if(near_sad[4] != INT_MAX)
|
||||
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
|
||||
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride);
|
||||
if(near_sad[5] != INT_MAX)
|
||||
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
|
||||
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride);
|
||||
if(near_sad[6] != INT_MAX)
|
||||
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
|
||||
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
|
||||
if(near_sad[7] != INT_MAX)
|
||||
near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
|
||||
near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride);
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
@@ -1778,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
|
||||
if(threshold < x->encode_breakout)
|
||||
threshold = x->encode_breakout;
|
||||
|
||||
var = vp8_variance16x16
|
||||
var = vpx_variance16x16
|
||||
(*(b->base_src), b->src_stride,
|
||||
x->e_mbd.predictor, 16, &sse);
|
||||
|
||||
@@ -1920,8 +1926,8 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd,
|
||||
(rd->distortion2-rd->distortion_uv));
|
||||
|
||||
best_mode->rd = this_rd;
|
||||
vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
|
||||
vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
|
||||
memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
|
||||
|
||||
if ((this_mode == B_PRED) || (this_mode == SPLITMV))
|
||||
{
|
||||
@@ -1983,9 +1989,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
best_mode.rd = INT_MAX;
|
||||
best_mode.yrd = INT_MAX;
|
||||
best_mode.intra_rd = INT_MAX;
|
||||
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
|
||||
vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
|
||||
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
|
||||
memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
|
||||
|
||||
/* Setup search priorities */
|
||||
get_reference_search_order(cpi, ref_frame_map);
|
||||
@@ -2287,7 +2293,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
|
||||
|
||||
/* Further step/diamond searches as necessary */
|
||||
n = 0;
|
||||
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
|
||||
|
||||
n = num00;
|
||||
@@ -2554,8 +2559,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
intra_rd_penalty, cpi, x);
|
||||
if (this_rd < best_mode.rd || x->skip)
|
||||
{
|
||||
/* Note index of best mode so far */
|
||||
best_mode_index = mode_index;
|
||||
*returnrate = rd.rate2;
|
||||
*returndistortion = rd.distortion2;
|
||||
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
|
||||
@@ -2580,7 +2583,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
|
||||
|
||||
/* macroblock modes */
|
||||
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
|
||||
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
|
||||
|
||||
if (best_mode.mbmode.mode == B_PRED)
|
||||
{
|
||||
@@ -2593,7 +2596,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
for (i = 0; i < 16; i++)
|
||||
xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
|
||||
|
||||
vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
|
||||
memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int =
|
||||
x->partition_info->bmi[15].mv.as_int;
|
||||
|
||||
@@ -136,6 +136,9 @@ extern void vp8_mv_pred
|
||||
int near_sadidx[]
|
||||
);
|
||||
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
|
||||
int VP8_UVSSE(MACROBLOCK *x);
|
||||
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
|
||||
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -23,7 +23,7 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
|
||||
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
|
||||
{
|
||||
/* Reset Gf useage monitors */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
}
|
||||
else
|
||||
|
||||
@@ -163,6 +163,8 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
int pre = d->offset;
|
||||
int pre_stride = x->e_mbd.pre.y_stride;
|
||||
|
||||
(void)error_thresh;
|
||||
|
||||
best_ref_mv1.as_int = 0;
|
||||
best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
|
||||
best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
|
||||
@@ -236,12 +238,12 @@ static void vp8_temporal_filter_iterate_c
|
||||
int mb_rows = cpi->common.mb_rows;
|
||||
int mb_y_offset = 0;
|
||||
int mb_uv_offset = 0;
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
|
||||
DECLARE_ALIGNED(16, unsigned int, accumulator[16*16 + 8*8 + 8*8]);
|
||||
DECLARE_ALIGNED(16, unsigned short, count[16*16 + 8*8 + 8*8]);
|
||||
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
|
||||
YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
|
||||
unsigned char *dst1, *dst2;
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[16*16 + 8*8 + 8*8]);
|
||||
|
||||
/* Save input state */
|
||||
unsigned char *y_buffer = mbd->pre.y_buffer;
|
||||
@@ -272,8 +274,8 @@ static void vp8_temporal_filter_iterate_c
|
||||
int i, j, k;
|
||||
int stride;
|
||||
|
||||
vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
|
||||
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
||||
memset(accumulator, 0, 384*sizeof(unsigned int));
|
||||
memset(count, 0, 384*sizeof(unsigned short));
|
||||
|
||||
#if ALT_REF_MC_ENABLED
|
||||
cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
|
||||
@@ -500,7 +502,7 @@ void vp8_temporal_filter_prepare_c
|
||||
start_frame = distance + frames_to_blur_forward;
|
||||
|
||||
/* Setup frame pointers, NULL indicates frame not included in filter */
|
||||
vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
|
||||
memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
|
||||
for (frame = 0; frame < frames_to_blur; frame++)
|
||||
{
|
||||
int which_buffer = start_frame - frame;
|
||||
|
||||
@@ -421,7 +421,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
|
||||
void init_context_counters(void)
|
||||
{
|
||||
vpx_memset(context_counters, 0, sizeof(context_counters));
|
||||
memset(context_counters, 0, sizeof(context_counters));
|
||||
}
|
||||
|
||||
void print_context_counters()
|
||||
@@ -596,13 +596,13 @@ void vp8_fix_contexts(MACROBLOCKD *x)
|
||||
/* Clear entropy contexts for Y2 blocks */
|
||||
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
}
|
||||
else
|
||||
{
|
||||
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/asm_offsets.h"
|
||||
#include "vpx_config.h"
|
||||
#include "block.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "onyx_int.h"
|
||||
#include "treewriter.h"
|
||||
#include "tokenize.h"
|
||||
|
||||
BEGIN
|
||||
|
||||
/* regular quantize */
|
||||
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
|
||||
DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
|
||||
DEFINE(vp8_block_round, offsetof(BLOCK, round));
|
||||
DEFINE(vp8_block_quant, offsetof(BLOCK, quant));
|
||||
DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
|
||||
DEFINE(vp8_block_zbin_extra, offsetof(BLOCK, zbin_extra));
|
||||
DEFINE(vp8_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
|
||||
DEFINE(vp8_block_quant_shift, offsetof(BLOCK, quant_shift));
|
||||
|
||||
DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
|
||||
DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
|
||||
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
|
||||
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
|
||||
|
||||
/* subtract */
|
||||
DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
|
||||
DEFINE(vp8_block_src, offsetof(BLOCK, src));
|
||||
DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
|
||||
DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
|
||||
|
||||
DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
|
||||
|
||||
/* pack tokens */
|
||||
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
|
||||
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
|
||||
DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
|
||||
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
|
||||
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
|
||||
DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
|
||||
DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
|
||||
|
||||
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
|
||||
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
|
||||
DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
|
||||
DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
|
||||
DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
|
||||
|
||||
DEFINE(vp8_token_value, offsetof(vp8_token, value));
|
||||
DEFINE(vp8_token_len, offsetof(vp8_token, Len));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
|
||||
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
|
||||
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
|
||||
DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
|
||||
DEFINE(vp8_writer_sz , sizeof(vp8_writer));
|
||||
|
||||
DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
|
||||
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
|
||||
DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
|
||||
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
END
|
||||
|
||||
/* add asserts for any offset that is not supported by assembly code
|
||||
* add asserts for any size that is not supported by assembly code
|
||||
|
||||
* These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
|
||||
* change they will have to be adjusted.
|
||||
*/
|
||||
|
||||
#if HAVE_EDSP
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
|
||||
#endif
|
||||
@@ -121,12 +121,12 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
|
||||
if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
|
||||
if (abs_sum_diff > sum_diff_thresh) {
|
||||
// Before returning to copy the block (i.e., apply no denoising),
|
||||
// checK if we can still apply some (weaker) temporal filtering to
|
||||
// check if we can still apply some (weaker) temporal filtering to
|
||||
// this block, that would otherwise not be denoised at all. Simplest
|
||||
// is to apply an additional adjustment to running_avg_y to bring it
|
||||
// closer to sig. The adjustment is capped by a maximum delta, and
|
||||
// chosen such that in most cases the resulting sum_diff will be
|
||||
// within the accceptable range given by sum_diff_thresh.
|
||||
// within the acceptable range given by sum_diff_thresh.
|
||||
|
||||
// The delta is set by the excess of absolute pixel diff over the
|
||||
// threshold.
|
||||
@@ -302,12 +302,12 @@ int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
|
||||
if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
|
||||
if (abs_sum_diff > sum_diff_thresh) {
|
||||
// Before returning to copy the block (i.e., apply no denoising),
|
||||
// checK if we can still apply some (weaker) temporal filtering to
|
||||
// check if we can still apply some (weaker) temporal filtering to
|
||||
// this block, that would otherwise not be denoised at all. Simplest
|
||||
// is to apply an additional adjustment to running_avg_y to bring it
|
||||
// closer to sig. The adjustment is capped by a maximum delta, and
|
||||
// chosen such that in most cases the resulting sum_diff will be
|
||||
// within the accceptable range given by sum_diff_thresh.
|
||||
// within the acceptable range given by sum_diff_thresh.
|
||||
|
||||
// The delta is set by the excess of absolute pixel diff over the
|
||||
// threshold.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user