Issue #546 - Update Tycho to libvpx 1.4 - Part 1: Update the lib

This commit is contained in:
NTD
2016-10-04 09:24:39 -04:00
committed by roytam1
parent dce1998623
commit 90bf8e82b0
428 changed files with 48441 additions and 31912 deletions
+1 -1
View File
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
enforcement activity against any entity (including a cross-claim or
counterclaim in a lawsuit) alleging that any of these implementations of WebM
or any code incorporated within any of these implementations of WebM
constitutes direct or contributory patent infringement, or inducement of
constitute direct or contributory patent infringement, or inducement of
patent infringement, then any patent rights granted to you under this License
for these implementations of WebM shall terminate as of the date such
litigation is filed.
+2 -2
View File
@@ -6,6 +6,6 @@ Mozilla build system.
The libvpx git repository is:
https://gerrit.chromium.org/gerrit/webm/libvpx
https://chromium.googlesource.com/webm/libvpx
The git commit ID used was 587ff646f
The git commit ID used was e67d45d4ce92468ba193288b59093fef0a502662
-29
View File
@@ -1,29 +0,0 @@
diff --git a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
--- a/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -27,21 +27,24 @@ DECLARE_ALIGNED(32, static const uint8_t
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
};
DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
};
#if defined(__clang__)
# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \
- (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0)
+ (defined(__APPLE__) && \
+ (__clang_major__ == 4 && __clang_minor__ >= 0 && \
+ __clang_minor__ <= 2) || \
+ (__clang_major__ == 5 && __clang_minor__ == 0))
# define MM256_BROADCASTSI128_SI256(x) \
_mm_broadcastsi128_si256((__m128i const *)&(x))
# else // clang > 3.3, and not 5.0 on macosx.
# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
# endif // clang <= 3.3
#elif defined(__GNUC__)
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
# define MM256_BROADCASTSI128_SI256(x) \
_mm_broadcastsi128_si256((__m128i const *)&(x))
# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
-857
View File
@@ -1,857 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
typedef enum {
OUTPUT_FMT_PLAIN,
OUTPUT_FMT_RVDS,
OUTPUT_FMT_GAS,
OUTPUT_FMT_C_HEADER,
} output_fmt_t;
int log_msg(const char *fmt, ...) {
int res;
va_list ap;
va_start(ap, fmt);
res = vfprintf(stderr, fmt, ap);
va_end(ap);
return res;
}
#if defined(__GNUC__) && __GNUC__
#if defined(FORCE_PARSE_ELF)
#if defined(__MACH__)
#undef __MACH__
#endif
#if !defined(__ELF__)
#define __ELF__
#endif
#endif
#if defined(__MACH__)
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
switch (mode) {
case OUTPUT_FMT_RVDS:
printf("%-40s EQU %5d\n", name, val);
return 0;
case OUTPUT_FMT_GAS:
printf(".set %-40s, %5d\n", name, val);
return 0;
case OUTPUT_FMT_C_HEADER:
printf("#define %-40s %5d\n", name, val);
return 0;
default:
log_msg("Unsupported mode: %d", mode);
return 1;
}
}
int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
int i, j;
struct mach_header header;
uint8_t *buf = base_buf;
int base_data_section = 0;
int bits = 0;
/* We can read in mach_header for 32 and 64 bit architectures
* because it's identical to mach_header_64 except for the last
* element (uint32_t reserved), which we don't use. Then, when
* we know which architecture we're looking at, increment buf
* appropriately.
*/
memcpy(&header, buf, sizeof(struct mach_header));
if (header.magic == MH_MAGIC) {
if (header.cputype == CPU_TYPE_ARM
|| header.cputype == CPU_TYPE_X86) {
bits = 32;
buf += sizeof(struct mach_header);
} else {
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
goto bail;
}
} else if (header.magic == MH_MAGIC_64) {
if (header.cputype == CPU_TYPE_X86_64) {
bits = 64;
buf += sizeof(struct mach_header_64);
} else {
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
goto bail;
}
} else {
log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
MH_MAGIC, MH_MAGIC_64, header.magic);
goto bail;
}
if (header.filetype != MH_OBJECT) {
log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
goto bail;
}
for (i = 0; i < header.ncmds; i++) {
struct load_command lc;
memcpy(&lc, buf, sizeof(struct load_command));
if (lc.cmd == LC_SEGMENT) {
uint8_t *seg_buf = buf;
struct section s;
struct segment_command seg_c;
memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
seg_buf += sizeof(struct segment_command);
/* Although each section is given it's own offset, nlist.n_value
* references the offset of the first section. This isn't
* apparent without debug information because the offset of the
* data section is the same as the first section. However, with
* debug sections mixed in, the offset of the debug section
* increases but n_value still references the first section.
*/
if (seg_c.nsects < 1) {
log_msg("Not enough sections\n");
goto bail;
}
memcpy(&s, seg_buf, sizeof(struct section));
base_data_section = s.offset;
} else if (lc.cmd == LC_SEGMENT_64) {
uint8_t *seg_buf = buf;
struct section_64 s;
struct segment_command_64 seg_c;
memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
seg_buf += sizeof(struct segment_command_64);
/* Explanation in LG_SEGMENT */
if (seg_c.nsects < 1) {
log_msg("Not enough sections\n");
goto bail;
}
memcpy(&s, seg_buf, sizeof(struct section_64));
base_data_section = s.offset;
} else if (lc.cmd == LC_SYMTAB) {
if (base_data_section != 0) {
struct symtab_command sc;
uint8_t *sym_buf = base_buf;
uint8_t *str_buf = base_buf;
memcpy(&sc, buf, sizeof(struct symtab_command));
if (sc.cmdsize != sizeof(struct symtab_command)) {
log_msg("Can't find symbol table!\n");
goto bail;
}
sym_buf += sc.symoff;
str_buf += sc.stroff;
for (j = 0; j < sc.nsyms; j++) {
/* Location of string is cacluated each time from the
* start of the string buffer. On darwin the symbols
* are prefixed by "_", so we bump the pointer by 1.
* The target value is defined as an int in *_asm_*_offsets.c,
* which is 4 bytes on all targets we currently use.
*/
if (bits == 32) {
struct nlist nl;
int val;
memcpy(&nl, sym_buf, sizeof(struct nlist));
sym_buf += sizeof(struct nlist);
memcpy(&val, base_buf + base_data_section + nl.n_value,
sizeof(val));
print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
} else { /* if (bits == 64) */
struct nlist_64 nl;
int val;
memcpy(&nl, sym_buf, sizeof(struct nlist_64));
sym_buf += sizeof(struct nlist_64);
memcpy(&val, base_buf + base_data_section + nl.n_value,
sizeof(val));
print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
}
}
}
}
buf += lc.cmdsize;
}
return 0;
bail:
return 1;
}
#elif defined(__ELF__)
#include "elf.h"
#define COPY_STRUCT(dst, buf, ofst, sz) do {\
if(ofst + sizeof((*(dst))) > sz) goto bail;\
memcpy(dst, buf+ofst, sizeof((*(dst))));\
} while(0)
#define ENDIAN_ASSIGN(val, memb) do {\
if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
(val) = (memb);\
} while(0)
#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
ENDIAN_ASSIGN(memb, memb);\
} while(0)
typedef struct {
uint8_t *buf; /* Buffer containing ELF data */
size_t sz; /* Buffer size */
int le_data; /* Data is little-endian */
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
int bits; /* 32 or 64 */
Elf32_Ehdr hdr32;
Elf64_Ehdr hdr64;
} elf_obj_t;
int parse_elf_header(elf_obj_t *elf) {
int res;
/* Verify ELF Magic numbers */
COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
res = elf->e_ident[EI_MAG0] == ELFMAG0;
res &= elf->e_ident[EI_MAG1] == ELFMAG1;
res &= elf->e_ident[EI_MAG2] == ELFMAG2;
res &= elf->e_ident[EI_MAG3] == ELFMAG3;
res &= elf->e_ident[EI_CLASS] == ELFCLASS32
|| elf->e_ident[EI_CLASS] == ELFCLASS64;
res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
if (!res) goto bail;
elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
/* Read in relevant values */
if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
elf->bits = 32;
COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
} else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
elf->bits = 64;
COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
}
return 0;
bail:
log_msg("Failed to parse ELF file header");
return 1;
}
int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
if (hdr32) {
if (idx >= elf->hdr32.e_shnum)
goto bail;
COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
elf->sz);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
} else { /* if (hdr64) */
if (idx >= elf->hdr64.e_shnum)
goto bail;
COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
elf->sz);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
}
return 0;
bail:
return 1;
}
const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
if (elf->bits == 32) {
Elf32_Shdr shdr;
if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
log_msg("Failed to parse ELF string table: section %d, index %d\n",
s_idx, idx);
return "";
}
return (char *)(elf->buf + shdr.sh_offset + idx);
} else { /* if (elf->bits == 64) */
Elf64_Shdr shdr;
if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
log_msg("Failed to parse ELF string table: section %d, index %d\n",
s_idx, idx);
return "";
}
return (char *)(elf->buf + shdr.sh_offset + idx);
}
}
int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
if (sym32) {
COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
} else { /* if (sym64) */
COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
}
return 0;
bail:
return 1;
}
int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
elf_obj_t elf;
unsigned int ofst;
int i;
Elf32_Off strtab_off32;
Elf64_Off strtab_off64; /* save String Table offset for later use */
memset(&elf, 0, sizeof(elf));
elf.buf = buf;
elf.sz = sz;
/* Parse Header */
if (parse_elf_header(&elf))
goto bail;
if (elf.bits == 32) {
Elf32_Shdr shdr;
for (i = 0; i < elf.hdr32.e_shnum; i++) {
parse_elf_section(&elf, i, &shdr, NULL);
if (shdr.sh_type == SHT_STRTAB) {
char strtsb_name[128];
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
if (!(strcmp(strtsb_name, ".shstrtab"))) {
/* log_msg("found section: %s\n", strtsb_name); */
strtab_off32 = shdr.sh_offset;
break;
}
}
}
} else { /* if (elf.bits == 64) */
Elf64_Shdr shdr;
for (i = 0; i < elf.hdr64.e_shnum; i++) {
parse_elf_section(&elf, i, NULL, &shdr);
if (shdr.sh_type == SHT_STRTAB) {
char strtsb_name[128];
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
if (!(strcmp(strtsb_name, ".shstrtab"))) {
/* log_msg("found section: %s\n", strtsb_name); */
strtab_off64 = shdr.sh_offset;
break;
}
}
}
}
/* Parse all Symbol Tables */
if (elf.bits == 32) {
Elf32_Shdr shdr;
for (i = 0; i < elf.hdr32.e_shnum; i++) {
parse_elf_section(&elf, i, &shdr, NULL);
if (shdr.sh_type == SHT_SYMTAB) {
for (ofst = shdr.sh_offset;
ofst < shdr.sh_offset + shdr.sh_size;
ofst += shdr.sh_entsize) {
Elf32_Sym sym;
parse_elf_symbol(&elf, ofst, &sym, NULL);
/* For all OBJECTS (data objects), extract the value from the
* proper data segment.
*/
/* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
log_msg("found data object %s\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name));
*/
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
&& sym.st_size == 4) {
Elf32_Shdr dhdr;
int val = 0;
char section_name[128];
parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
/* For explanition - refer to _MSC_VER version of code */
strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
if (strcmp(section_name, ".bss")) {
if (sizeof(val) != sym.st_size) {
/* The target value is declared as an int in
* *_asm_*_offsets.c, which is 4 bytes on all
* targets we currently use. Complain loudly if
* this is not true.
*/
log_msg("Symbol size is wrong\n");
goto bail;
}
memcpy(&val,
elf.buf + dhdr.sh_offset + sym.st_value,
sym.st_size);
}
if (!elf.le_data) {
log_msg("Big Endian data not supported yet!\n");
goto bail;
}
switch (mode) {
case OUTPUT_FMT_RVDS:
printf("%-40s EQU %5d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
break;
case OUTPUT_FMT_GAS:
printf(".equ %-40s, %5d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
break;
case OUTPUT_FMT_C_HEADER:
printf("#define %-40s %5d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
break;
default:
printf("%s = %d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
}
}
}
}
}
} else { /* if (elf.bits == 64) */
Elf64_Shdr shdr;
for (i = 0; i < elf.hdr64.e_shnum; i++) {
parse_elf_section(&elf, i, NULL, &shdr);
if (shdr.sh_type == SHT_SYMTAB) {
for (ofst = shdr.sh_offset;
ofst < shdr.sh_offset + shdr.sh_size;
ofst += shdr.sh_entsize) {
Elf64_Sym sym;
parse_elf_symbol(&elf, ofst, NULL, &sym);
/* For all OBJECTS (data objects), extract the value from the
* proper data segment.
*/
/* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
log_msg("found data object %s\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name));
*/
if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
&& sym.st_size == 4) {
Elf64_Shdr dhdr;
int val = 0;
char section_name[128];
parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
/* For explanition - refer to _MSC_VER version of code */
strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
if ((strcmp(section_name, ".bss"))) {
if (sizeof(val) != sym.st_size) {
/* The target value is declared as an int in
* *_asm_*_offsets.c, which is 4 bytes on all
* targets we currently use. Complain loudly if
* this is not true.
*/
log_msg("Symbol size is wrong\n");
goto bail;
}
memcpy(&val,
elf.buf + dhdr.sh_offset + sym.st_value,
sym.st_size);
}
if (!elf.le_data) {
log_msg("Big Endian data not supported yet!\n");
goto bail;
}
switch (mode) {
case OUTPUT_FMT_RVDS:
printf("%-40s EQU %5d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
break;
case OUTPUT_FMT_GAS:
printf(".equ %-40s, %5d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
break;
default:
printf("%s = %d\n",
parse_elf_string_table(&elf,
shdr.sh_link,
sym.st_name),
val);
}
}
}
}
}
}
if (mode == OUTPUT_FMT_RVDS)
printf(" END\n");
return 0;
bail:
log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
return 1;
}
#endif
#endif /* defined(__GNUC__) && __GNUC__ */
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
/* See "Microsoft Portable Executable and Common Object File Format Specification"
for reference.
*/
#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
int parse_coff(uint8_t *buf, size_t sz) {
unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
unsigned int sectionrawdata_ptr;
unsigned int i;
uint8_t *ptr;
uint32_t symoffset;
char **sectionlist; // this array holds all section names in their correct order.
// it is used to check if the symbol is in .bss or .rdata section.
nsections = get_le16(buf + 2);
symtab_ptr = get_le32(buf + 8);
symtab_sz = get_le32(buf + 12);
strtab_ptr = symtab_ptr + symtab_sz * 18;
if (nsections > 96) {
log_msg("Too many sections\n");
return 1;
}
sectionlist = malloc(nsections * sizeof(sectionlist));
if (sectionlist == NULL) {
log_msg("Allocating first level of section list failed\n");
return 1;
}
// log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
/*
The size of optional header is always zero for an obj file. So, the section header
follows the file header immediately.
*/
ptr = buf + 20; // section header
for (i = 0; i < nsections; i++) {
char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
strncpy(sectionname, ptr, 8);
// log_msg("COFF: Parsing section %s\n",sectionname);
sectionlist[i] = malloc(strlen(sectionname) + 1);
if (sectionlist[i] == NULL) {
log_msg("Allocating storage for %s failed\n", sectionname);
goto bail;
}
strcpy(sectionlist[i], sectionname);
// check if it's .rdata and is not a COMDAT section.
if (!strcmp(sectionname, ".rdata") &&
(get_le32(ptr + 36) & 0x1000) == 0) {
sectionrawdata_ptr = get_le32(ptr + 20);
}
ptr += 40;
}
// log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
// log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
/* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
Note from Wiki: In an object module compiled from C, the bss section contains
the local variables (but not functions) that were declared with the static keyword,
except for those with non-zero initial values. (In C, static variables are initialized
to zero by default.) It also contains the non-local (both extern and static) variables
that are also initialized to zero (either explicitly or by default).
*/
// move to symbol table
/* COFF symbol table:
offset field
0 Name(*)
8 Value
12 SectionNumber
14 Type
16 StorageClass
17 NumberOfAuxSymbols
*/
ptr = buf + symtab_ptr;
for (i = 0; i < symtab_sz; i++) {
int16_t section = get_le16(ptr + 12); // section number
if (section > 0 && ptr[16] == 2) {
// if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
if (get_le32(ptr)) {
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
strncpy(name, ptr, 8);
// log_msg("COFF: Parsing symbol %s\n",name);
/* The 64bit Windows compiler doesn't prefix with an _.
* Check what's there, and bump if necessary
*/
if (name[0] == '_')
printf("%-40s EQU ", name + 1);
else
printf("%-40s EQU ", name);
} else {
// log_msg("COFF: Parsing symbol %s\n",
// buf + strtab_ptr + get_le32(ptr+4));
if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
printf("%-40s EQU ",
buf + strtab_ptr + get_le32(ptr + 4) + 1);
else
printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
}
if (!(strcmp(sectionlist[section - 1], ".bss"))) {
symoffset = 0;
} else {
symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
}
// log_msg(" Section: %d\n",section);
// log_msg(" Class: %d\n",ptr[16]);
// log_msg(" Address: %u\n",get_le32(ptr+8));
// log_msg(" Offset: %u\n", symoffset);
printf("%5d\n", symoffset);
}
ptr += 18;
}
printf(" END\n");
for (i = 0; i < nsections; i++) {
free(sectionlist[i]);
}
free(sectionlist);
return 0;
bail:
for (i = 0; i < nsections; i++) {
free(sectionlist[i]);
}
free(sectionlist);
return 1;
}
#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
int main(int argc, char **argv) {
output_fmt_t mode = OUTPUT_FMT_PLAIN;
const char *f;
uint8_t *file_buf;
int res;
FILE *fp;
long int file_size;
if (argc < 2 || argc > 3) {
fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
fprintf(stderr, " <obj file>\tobject file to parse\n");
fprintf(stderr, "Output Formats:\n");
fprintf(stderr, " gas - compatible with GNU assembler\n");
fprintf(stderr, " rvds - compatible with armasm\n");
fprintf(stderr, " cheader - c/c++ header file\n");
goto bail;
}
f = argv[2];
if (!strcmp(argv[1], "rvds"))
mode = OUTPUT_FMT_RVDS;
else if (!strcmp(argv[1], "gas"))
mode = OUTPUT_FMT_GAS;
else if (!strcmp(argv[1], "cheader"))
mode = OUTPUT_FMT_C_HEADER;
else
f = argv[1];
fp = fopen(f, "rb");
if (!fp) {
perror("Unable to open file");
goto bail;
}
if (fseek(fp, 0, SEEK_END)) {
perror("stat");
goto bail;
}
file_size = ftell(fp);
file_buf = malloc(file_size);
if (!file_buf) {
perror("malloc");
goto bail;
}
rewind(fp);
if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
perror("read");
goto bail;
}
if (fclose(fp)) {
perror("close");
goto bail;
}
#if defined(__GNUC__) && __GNUC__
#if defined(__MACH__)
res = parse_macho(file_buf, file_size, mode);
#elif defined(__ELF__)
res = parse_elf(file_buf, file_size, mode);
#endif
#endif
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
res = parse_coff(file_buf, file_size);
#endif
free(file_buf);
if (!res)
return EXIT_SUCCESS;
bail:
return EXIT_FAILURE;
}
@@ -0,0 +1,32 @@
From: Jacek Caban <jacek@codeweavers.com>
Don't use pthread for libvpx in mingw builds.
diff --git a/media/libvpx/vpx_config_x86-win32-gcc.h b/media/libvpx/vpx_config_x86-win32-gcc.h
index 5bc3efb..e60f84d 100644
--- a/media/libvpx/vpx_config_x86-win32-gcc.h
+++ b/media/libvpx/vpx_config_x86-win32-gcc.h
@@ -32,7 +32,8 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_PTHREAD_H 1
+#undef HAVE_PTHREAD_H
+#define HAVE_PTHREAD_H 0
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
diff --git a/media/libvpx/vpx_config_x86_64-win64-gcc.h b/media/libvpx/vpx_config_x86_64-win64-gcc.h
index 4ff4339..b056a0e 100644
--- a/media/libvpx/vpx_config_x86_64-win64-gcc.h
+++ b/media/libvpx/vpx_config_x86_64-win64-gcc.h
@@ -32,7 +32,8 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_PTHREAD_H 1
+#undef HAVE_PTHREAD_H
+#define HAVE_PTHREAD_H 0
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
-24
View File
@@ -1,24 +0,0 @@
diff --git a/media/libvpx/vp9/common/vp9_systemdependent.h b/media/libvpx/vp9/common/vp9_systemdependent.h
--- a/media/libvpx/vp9/common/vp9_systemdependent.h
+++ b/media/libvpx/vp9/common/vp9_systemdependent.h
@@ -12,17 +12,19 @@
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#ifdef _MSC_VER
# include <math.h> // the ceil() definition must precede intrin.h
# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
# include <intrin.h>
# define USE_MSC_INTRIN
# endif
-# define snprintf _snprintf
+# if _MSC_VER < 1900
+# define snprintf _snprintf
+# endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
#include "./vpx_config.h"
#if ARCH_X86 || ARCH_X86_64
+3 -3
View File
@@ -6,9 +6,9 @@
+#if !defined(VPX_DONT_DEFINE_STDINT_TYPES)
+
#if defined(_MSC_VER)
#define VPX_FORCE_INLINE __forceinline
#define VPX_INLINE __inline
#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES)
typedef signed char int8_t;
typedef signed short int16_t;
@@ -56,6 +58,8 @@
#endif
+15 -2
View File
@@ -36,7 +36,9 @@
%include "vpx_config.asm"
%ifndef program_name
%define program_name vp9
%endif
%define UNIX64 0
@@ -78,6 +80,9 @@
%macro SECTION_RODATA 0-1 16
%ifidn __OUTPUT_FORMAT__,macho64
SECTION .text align=%1
%elifidn __OUTPUT_FORMAT__,macho32
SECTION .text align=%1
fakegot:
%elifidn __OUTPUT_FORMAT__,macho
SECTION .text align=%1
fakegot:
@@ -617,9 +622,17 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%elifidn __OUTPUT_FORMAT__,elf64
global %1:function hidden
%elifidn __OUTPUT_FORMAT__,macho32
global %1:private_extern
%ifdef __NASM_VER__
global %1
%else
global %1:private_extern
%endif
%elifidn __OUTPUT_FORMAT__,macho64
global %1:private_extern
%ifdef __NASM_VER__
global %1
%else
global %1:private_extern
%endif
%else
global %1
%endif
+100 -44
View File
@@ -27,12 +27,13 @@ PLATFORMS= [
mk_files = [
'vp8/vp8_common.mk',
'vp8/vp8cx_arm.mk',
'vp8/vp8cx.mk',
'vp8/vp8dx.mk',
'vp8/vp8cx_arm.mk',
'vp9/vp9_common.mk',
'vp9/vp9cx.mk',
'vp9/vp9dx.mk',
'vpx_dsp/vpx_dsp.mk',
'vpx_mem/vpx_mem.mk',
'vpx_ports/vpx_ports.mk',
'vpx_scale/vpx_scale.mk',
@@ -42,12 +43,14 @@ mk_files = [
extensions = ['.asm', '.c', '.h']
MODULES = {
'UNIFIED_SOURCES': [
'SOURCES': [
'API_DOC_SRCS-$(CONFIG_VP8_DECODER)',
'API_DOC_SRCS-yes',
'API_EXPORTS',
'API_SRCS-$(CONFIG_VP8_DECODER)',
'API_SRCS-yes',
'DSP_SRCS-yes',
'DSP_SRCS-yes+$(CONFIG_ENCODERS)',
'MEM_SRCS-yes',
'PORTS_SRCS-yes',
'SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING)',
@@ -77,6 +80,7 @@ MODULES = {
'VP8_CX_SRCS-no',
'VP8_CX_SRCS_REMOVE-no',
'VP8_CX_SRCS_REMOVE-yes',
'VP8_CX_SRCS_REMOVE-yes+$(CONFIG_REALTIME_ONLY)',
'VP8_CX_SRCS-yes',
'VP9_CX_EXPORTS',
'VP9_CX_SRCS-no',
@@ -85,35 +89,59 @@ MODULES = {
'VP9_CX_SRCS-yes',
],
'X86_ASM': [
'DSP_SRCS-$(HAVE_MMX)',
'DSP_SRCS-$(HAVE_MMX)+$(CONFIG_ENCODERS)',
'DSP_SRCS-$(HAVE_SSE2)',
'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_ENCODERS)',
'DSP_SRCS-$(HAVE_SSE3)+$(CONFIG_ENCODERS)',
'DSP_SRCS-$(HAVE_SSE4_1)+$(CONFIG_ENCODERS)',
'DSP_SRCS-$(HAVE_SSSE3)+$(CONFIG_ENCODERS)',
'PORTS_SRCS-$(BUILD_LIBVPX)',
'PORTS_SRCS-$(BUILD_LIBVPX)+$(ARCH_X86)$(ARCH_X86_64)',
'PORTS_SRCS-yes+$(ARCH_X86)$(ARCH_X86_64)',
'VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
'VP8_COMMON_SRCS-$(HAVE_MMX)',
'VP8_COMMON_SRCS-$(HAVE_MMX)+$(CONFIG_POSTPROC)',
'VP8_COMMON_SRCS-$(HAVE_SSE2)',
'VP8_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_POSTPROC)',
'VP8_COMMON_SRCS-$(HAVE_SSE3)',
'VP8_COMMON_SRCS-$(HAVE_SSE4_1)',
'VP8_COMMON_SRCS-$(HAVE_SSSE3)',
'VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
'VP9_COMMON_SRCS-$(HAVE_MMX)',
'VP9_COMMON_SRCS-$(HAVE_SSE2)',
'VP9_COMMON_SRCS-$(HAVE_SSSE3)',
'VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
'VP8_CX_SRCS-$(HAVE_MMX)',
'VP8_CX_SRCS-$(HAVE_SSE2)',
'VP8_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_TEMPORAL_DENOISING)',
'VP8_CX_SRCS-$(HAVE_SSE4_1)',
'VP8_CX_SRCS-$(HAVE_SSSE3)',
'VP8_CX_SRCS_REMOVE-$(HAVE_SSE2)',
'VP8_CX_SRCS_REMOVE-$(HAVE_SSE2)+$(CONFIG_REALTIME_ONLY)',
'VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
'VP9_COMMON_SRCS-$(HAVE_MMX)',
'VP9_COMMON_SRCS-$(HAVE_SSE2)',
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_USE_X86INC)',
'VP9_COMMON_SRCS-$(HAVE_SSSE3)',
'VP9_COMMON_SRCS-$(HAVE_SSSE3)+$(CONFIG_USE_X86INC)',
'VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)',
'VP9_CX_SRCS-$(HAVE_MMX)',
'VP9_CX_SRCS-$(HAVE_MMX)+$(CONFIG_USE_X86INC)',
'VP9_CX_SRCS-$(HAVE_SSE2)',
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_USE_X86INC)',
'VP9_CX_SRCS-$(HAVE_SSE3)',
'VP9_CX_SRCS-$(HAVE_SSE4_1)',
'VP9_CX_SRCS-$(HAVE_SSSE3)',
],
'X86-64_ASM': [
'VP8_COMMON_SRCS-$(HAVE_SSE2)+$(ARCH_X86_64)',
'VP8_CX_SRCS-$(ARCH_X86_64)',
'VP9_COMMON_SRCS-$(HAVE_SSSE3)+$(ARCH_X86_64)',
'VP9_CX_SRCS-$(ARCH_X86_64)',
'VP9_CX_SRCS-$(HAVE_SSSE3)+$(ARCH_X86_64)',
],
'ARM_ASM': [
'DSP_SRCS-$(HAVE_MEDIA)',
'DSP_SRCS-$(HAVE_MEDIA)+$(CONFIG_ENCODERS)',
'DSP_SRCS-$(HAVE_NEON)',
'DSP_SRCS-$(HAVE_NEON)+$(CONFIG_ENCODERS)',
'PORTS_SRCS-$(ARCH_ARM)',
'SCALE_SRCS-$(HAVE_NEON)',
'VP8_COMMON_SRCS-$(ARCH_ARM)',
@@ -121,6 +149,7 @@ MODULES = {
'VP8_COMMON_SRCS-$(HAVE_NEON)',
'VP9_COMMON_SRCS-$(HAVE_NEON)',
'VP9_COMMON_SRCS-$(HAVE_NEON_ASM)',
'VP9_COMMON_SRCS-yes+$(HAVE_NEON_ASM)',
'VP8_CX_SRCS-$(ARCH_ARM)',
'VP8_CX_SRCS-$(HAVE_EDSP)',
'VP8_CX_SRCS-$(HAVE_MEDIA)',
@@ -132,6 +161,8 @@ MODULES = {
'VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT)',
],
'AVX2': [
'DSP_SRCS-$(HAVE_AVX2)',
'DSP_SRCS-$(HAVE_AVX2)+$(CONFIG_ENCODERS)',
'VP9_COMMON_SRCS-$(HAVE_AVX2)',
'VP9_CX_SRCS-$(HAVE_AVX2)',
],
@@ -140,28 +171,43 @@ MODULES = {
],
'VP9_POSTPROC': [
'VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC)',
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_POSTPROC)',
]
}
DISABLED_MODULES = [
'API_SRCS-$(CONFIG_SPATIAL_SVC)',
'API_SRCS-$(CONFIG_SPATIAL_SVC)+$(CONFIG_VP9_ENCODER)',
'MEM_SRCS-$(CONFIG_MEM_MANAGER)',
'MEM_SRCS-$(CONFIG_MEM_TRACKER)',
'VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)',
'VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER)',
'VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS)',
'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)',
'VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS)+$(CONFIG_VP9_POSTPROC)',
'VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING)',
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_TEMPORAL_DENOISING)',
# VP9_HIGHBITDEPTH
'DSP_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
'VP9_COMMON_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
'VP9_CX_SRCS-$(HAVE_SSE2)+$(CONFIG_VP9_HIGHBITDEPTH)',
# use asm implementations instead of intrinsics
# neon exists as assembly and intrinsics implementations.
# If both are available prefer assembly (HAVE_NEON_ASM)
'VP9_COMMON_SRCS-yes+$(HAVE_NEON)',
# mips files are also ignored via ignored_folders
'SCALE_SRCS-$(HAVE_DSPR2)',
'VP8_COMMON_SRCS-$(HAVE_DSPR2)',
'VP9_COMMON_SRCS-$(HAVE_DSPR2)',
'VP8_CX_SRCS_REMOVE-$(HAVE_EDSP)',
'VP9_COMMON_SRCS-$(HAVE_MSA)',
'VP9_CX_SRCS-$(HAVE_MSA)',
]
libvpx_files = [
'build/make/obj_int_extract.c',
'build/make/ads2gas.pl',
'build/make/thumb.pm',
'LICENSE',
@@ -195,6 +241,16 @@ ignore_folders = [
'test/',
'vpx_mem/memory_manager/',
]
rename_files = {
#avoid clash between vpx_dsp/x86 and vp8/common/x86
'vp8/common/x86/variance_mmx.c': 'vp8/common/x86/vp8_variance_mmx.c',
'vp8/common/x86/variance_sse2.c': 'vp8/common/x86/vp8_variance_sse2.c',
'vp8/common/x86/variance_impl_mmx.asm': 'vp8/common/x86/vp8_variance_impl_mmx.asm',
#avoid clash with common/arm/neon/vp9_avg_neon.c
'vp9/encoder/arm/neon/vp9_avg_neon.c': 'vp9/encoder/arm/neon/vp9enc_avg_neon.c',
}
files = {
'EXPORTS': [
'vpx_mem/include/vpx_mem_intrnl.h',
@@ -217,12 +273,9 @@ files = {
],
'X86-64_ASM': [
'third_party/x86inc/x86inc.asm',
'vp8/common/x86/loopfilter_block_sse2_x86_64.asm',
'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
],
'SOURCES': [
'vp8/common/rtcd.c',
'vp8/common/sad_c.c',
'vp8/encoder/bitstream.c',
'vp8/encoder/onyx_if.c',
'vp8/vp8_dx_iface.c',
@@ -249,26 +302,18 @@ files = {
'vp9/common/vp9_scale.c',
'vp9/common/vp9_scan.c',
'vp9/common/vp9_seg_common.c',
'vp9/common/vp9_thread.c',
'vp9/common/vp9_tile_common.c',
'vp9/decoder/vp9_decodeframe.c',
'vp9/decoder/vp9_decodemv.c',
'vp9/decoder/vp9_decoder.c',
'vp9/decoder/vp9_detokenize.c',
'vp9/decoder/vp9_dsubexp.c',
'vp9/decoder/vp9_dthread.c',
'vp9/decoder/vp9_reader.c',
'vp9/encoder/vp9_bitstream.c',
'vp9/encoder/vp9_aq_complexity.c',
'vp9/encoder/vp9_aq_cyclicrefresh.c',
'vp9/encoder/vp9_aq_variance.c',
'vp9/encoder/vp9_context_tree.c',
'vp9/encoder/vp9_cost.c',
'vp9/encoder/vp9_dct.c',
'vp9/encoder/vp9_encodeframe.c',
'vp9/encoder/vp9_encodemb.c',
'vp9/encoder/vp9_encodemv.c',
'vp9/encoder/vp9_encoder.c',
'vp9/encoder/vp9_extend.c',
'vp9/encoder/vp9_firstpass.c',
'vp9/encoder/vp9_lookahead.c',
@@ -278,23 +323,17 @@ files = {
'vp9/encoder/vp9_pickmode.c',
'vp9/encoder/vp9_quantize.c',
'vp9/encoder/vp9_ratectrl.c',
'vp9/encoder/vp9_rd.c',
'vp9/encoder/vp9_rdopt.c',
'vp9/encoder/vp9_resize.c',
'vp9/encoder/vp9_sad.c',
'vp9/encoder/vp9_segmentation.c',
'vp9/encoder/vp9_speed_features.c',
'vp9/encoder/vp9_subexp.c',
'vp9/encoder/vp9_svc_layercontext.c',
'vp9/encoder/vp9_temporal_filter.c',
'vp9/encoder/vp9_tokenize.c',
'vp9/encoder/vp9_treewriter.c',
'vp9/encoder/vp9_variance.c',
'vp9/encoder/vp9_write_bit_buffer.c',
'vp9/encoder/vp9_writer.c',
'vp9/vp9_cx_iface.c',
'vp9/vp9_dx_iface.c',
'vpx/src/svc_encodeframe.c',
'vpx/src/vpx_encoder.c',
'vpx_mem/vpx_mem.c',
'vpx_scale/vpx_scale_rtcd.c',
@@ -307,9 +346,9 @@ manual = [
# special case in moz.build
'vp8/encoder/boolhuff.c',
# 64bit only
'vp8/common/x86/loopfilter_block_sse2_x86_64.asm',
'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
# These 64-bit only files end up in X86_ASM. Filter them out.
'vp8/common/x86/loopfilter_block_sse2.asm',
'vp9/encoder/x86/vp9_quantize_ssse3.asm',
# offsets are special cased in Makefile.in
'vp8/encoder/vp8_asm_enc_offsets.c',
@@ -333,27 +372,28 @@ platform_files = [
'vpx_config.asm',
'vpx_config.h',
'vpx_scale_rtcd.h',
'vpx_dsp_rtcd.h',
]
def prepare_upstream(prefix, commit=None):
upstream_url = 'https://chromium.googlesource.com/webm/libvpx'
if os.path.exists(prefix):
print "Please remove '%s' folder before running %s" % (prefix, sys.argv[0])
sys.exit(1)
upstream_url = 'https://gerrit.chromium.org/gerrit/webm/libvpx'
subprocess.call(['git', 'clone', upstream_url, prefix])
if commit:
os.chdir(prefix)
subprocess.call(['git', 'fetch', 'origin'])
else:
subprocess.call(['git', 'clone', upstream_url, prefix])
os.chdir(prefix)
if commit:
subprocess.call(['git', 'checkout', commit])
else:
os.chdir(prefix)
p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
stdout, stderr = p.communicate()
commit = stdout.strip()
for target in PLATFORMS:
target_objdir = os.path.join(prefix, 'objdir', target)
os.makedirs(target_objdir)
if not os.path.exists(target_objdir):
os.makedirs(target_objdir)
os.chdir(target_objdir)
configure = ['../../configure', '--target=%s' % target,
'--disable-examples', '--disable-install-docs',
@@ -372,10 +412,14 @@ def prepare_upstream(prefix, commit=None):
if target == 'armv7-android-gcc':
configure += ['--sdk-path=%s' % ndk_path]
print "\n" + target_objdir
print " ".join(configure)
sys.stdout.flush()
subprocess.call(configure)
make_targets = [f for f in platform_files if not os.path.exists(f)]
if make_targets:
print " ".join(['make'] + make_targets)
sys.stdout.flush()
subprocess.call(['make'] + make_targets)
for f in make_targets:
if not os.path.exists(f):
@@ -386,7 +430,7 @@ def prepare_upstream(prefix, commit=None):
return commit
def cleanup_upstream():
shutil.rmtree(os.path.join(base, 'upstream'))
shutil.rmtree(os.path.join(base, 'upstream/objdir'))
def get_module(key):
for module in MODULES:
@@ -412,10 +456,17 @@ def get_sources(prefix):
for mk in mk_files:
with open(os.path.join(prefix, mk)) as f:
base = os.path.dirname(mk)
extra = ''
for l in f:
m = re.compile('ifeq +\((.*?\)), *yes\)').findall(l)
if m:
extra = '+' + m[0]
if extra and l.startswith('else') or l.startswith('endif'):
extra = ''
if '+=' in l:
l = l.split('+=')
key = l[0].strip()
key += extra
value = l[1].strip().replace('$(ASM)', '.asm')
value = os.path.join(base, value)
if not key.startswith('#') and os.path.splitext(value)[-1] in extensions:
@@ -425,6 +476,7 @@ def get_sources(prefix):
for key in source:
for f in source[key]:
f = rename_files.get(f, f)
if key.endswith('EXPORTS') and f.endswith('.h'):
files['EXPORTS'].append(f)
if os.path.splitext(f)[-1] in ('.c', '.asm') and not f in manual:
@@ -443,8 +495,6 @@ def get_sources(prefix):
t = unknown[key]
t.append(f)
files['UNIFIED_SOURCES'] = [f for f in files['UNIFIED_SOURCES'] if f not in files['SOURCES']]
for key in files:
files[key] = list(sorted(set(files[key])))
@@ -495,6 +545,7 @@ def update_and_remove_files(prefix, libvpx_files, files):
if fdir and not os.path.exists(fdir):
os.makedirs(fdir)
s = os.path.join(prefix, f)
f = rename_files.get(f, f)
if is_new(f, s):
if first:
print "Copy files:"
@@ -521,7 +572,11 @@ def update_and_remove_files(prefix, libvpx_files, files):
copy(s, f)
# Remove unknown files from tree
removed_files = [f for f in current_files if f not in libvpx_files]
removed_files = [f for f in current_files if f not in libvpx_files and f not in rename_files.values()]
for f in rename_files:
if os.path.exists(f) and os.path.exists(rename_files[f]) and not f in removed_files:
removed_files.append(f)
if removed_files:
print "Remove files:"
for f in removed_files:
@@ -531,12 +586,13 @@ def update_and_remove_files(prefix, libvpx_files, files):
def apply_patches():
# Patch to permit vpx users to specify their own <stdint.h> types.
os.system("patch -p0 < stdint.patch")
# Patch to allow older versions of Apple's clang to build libvpx.
os.system("patch -p3 < apple-clang.patch")
# Patch to allow MSVC 2015 to compile libvpx
os.system("patch -p3 < msvc2015.patch")
# Patch to fix a crash caused by MSVC 2013
os.system("patch -p3 < bug1137614.patch")
# Bug 1176730 - Don't use pthread for libvpx in mingw builds.
os.system("patch -p3 < disable_pthread_on_mingw.patch")
# Cherry pick https://chromium-review.googlesource.com/#/c/276889/
# to fix crash on 32bit
os.system("patch -p1 < vp9_filter_restore_aligment.patch")
def update_readme(commit):
with open('README_MOZILLA') as f:
+5 -4
View File
@@ -10,6 +10,7 @@
#include "vpx_config.h"
#include "alloccommon.h"
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
#include "onyxc_int.h"
@@ -103,9 +104,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
goto allocation_fail;
oci->post_proc_buffer_int_used = 0;
vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
oci->post_proc_buffer.frame_size);
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
memset(oci->post_proc_buffer.buffer_alloc, 128,
oci->post_proc_buffer.frame_size);
/* Allocate buffer to store post-processing filter coefficients.
*
@@ -176,7 +177,7 @@ void vp8_create_common(VP8_COMMON *oci)
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
/* Default disable buffer to buffer copying */
oci->copy_buffer_to_gf = 0;
@@ -165,7 +165,7 @@ vp8_dequant_idct_loop2_v6
str r1, [r2], r12 ; store output to dst
bne vp8_dequant_idct_loop2_v6
; vpx_memset
; memset
sub r0, r0, #32
add sp, sp, #4
@@ -1,154 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_variance16x16_armv6|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance16x16_armv6| PROC
stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0
mov r11, #0 ; initialize sse = 0
mov r12, #16 ; set loop counter to 16 (=block height)
loop
; 1st 4 pixels
ldr r4, [r0, #0] ; load 4 src pixels
ldr r5, [r2, #0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r4, [r0, #4] ; load 4 src pixels
ldr r5, [r2, #4] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr r4, [r0, #8] ; load 4 src pixels
ldr r5, [r2, #8] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr r4, [r0, #12] ; load 4 src pixels
ldr r5, [r2, #12] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
subs r12, r12, #1
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
END
@@ -1,101 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_variance8x8_armv6|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance8x8_armv6| PROC
push {r4-r10, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #8 ; set loop counter to 8 (=block height)
mov r4, #0 ; initialize sum = 0
mov r5, #0 ; initialize sse = 0
loop
; 1st 4 pixels
ldr r6, [r0, #0x0] ; load 4 src pixels
ldr r7, [r2, #0x0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r8, r6, r7 ; calculate difference
pld [r0, r1, lsl #1]
sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r6, r10, lr ; calculate sum of positive differences
usad8 r7, r8, lr ; calculate sum of negative differences
orr r8, r8, r10 ; differences of all 4 pixels
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r6, [r0, #0x4] ; load 4 src pixels
ldr r7, [r2, #0x4] ; load 4 ref pixels
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
usub8 r8, r6, r7 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r6, r10, lr ; calculate sum of positive differences
usad8 r7, r8, lr ; calculate sum of negative differences
orr r8, r8, r10 ; differences of all 4 pixels
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
subs r12, r12, #1 ; next row
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
bne loop
; return stuff
ldr r8, [sp, #32] ; get address of sse
mul r1, r4, r4 ; sum * sum
str r5, [r8] ; store sse
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
pop {r4-r10, pc}
ENDP
END
+3 -3
View File
@@ -99,7 +99,7 @@ void vp8_sixtap_predict4x4_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
@@ -147,7 +147,7 @@ void vp8_sixtap_predict8x8_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
@@ -189,7 +189,7 @@ void vp8_sixtap_predict16x16_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
-184
View File
@@ -1,184 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
unsigned int vp8_sad8x8_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 7; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad8x16_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 15; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad4x4_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x2_t d1;
uint64x1_t d3;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 3; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
d1 = vpaddl_u16(vget_low_u16(q12));
d3 = vpaddl_u32(d1);
return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
}
unsigned int vp8_sad16x16_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x16_t q0, q4;
uint16x8_t q12, q13;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
for (i = 0; i < 15; i++) {
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
}
q12 = vaddq_u16(q12, q13);
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad16x8_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x16_t q0, q4;
uint16x8_t q12, q13;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
for (i = 0; i < 7; i++) {
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
}
q12 = vaddq_u16(q12, q13);
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
@@ -1,320 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "vpx_ports/mem.h"
unsigned int vp8_variance16x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) {
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance16x8_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 4; i++) { // variance16x8_neon_loop
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance8x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
uint8x8_t d0u8, d2u8, d4u8, d6u8;
int16x4_t d22s16, d23s16, d24s16, d25s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint16x8_t q11u16, q12u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) { // variance8x16_neon_loop
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d2u8, d6u8);
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance8x8_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 2; i++) { // variance8x8_neon_loop
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d1u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d3u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d5u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d7u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);
q13u16 = vsubl_u8(d2u8, d6u8);
q14u16 = vsubl_u8(d3u8, d7u8);
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
@@ -32,7 +32,7 @@ unsigned int vp8_sub_pixel_variance16x16_neon_func(
int dst_pixels_per_line,
unsigned int *sse) {
int i;
DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528);
DECLARE_ALIGNED(16, unsigned char, tmp[528]);
unsigned char *tmpp;
unsigned char *tmpp2;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
@@ -911,12 +911,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_neon(
return vget_lane_u32(d0u32, 0);
}
enum { kWidth8 = 8 };
enum { kHeight8 = 8 };
enum { kHeight8PlusOne = 9 };
enum { kPixelStepOne = 1 };
enum { kAlign16 = 16 };
#define FILTER_BITS 7
static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
@@ -968,8 +962,8 @@ static unsigned int variance8x8_neon(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
unsigned int *sse) {
int sum;
variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum);
return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8));
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
return *sse - (((int64_t)sum * sum) / (8 * 8));
}
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
@@ -1003,22 +997,21 @@ unsigned int vp8_sub_pixel_variance8x8_neon(
const unsigned char *dst,
int dst_stride,
unsigned int *sse) {
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8);
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8);
DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]);
DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
if (xoffset == 0) {
var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8,
kWidth8, bilinear_taps_coeff[yoffset]);
var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8,
8, bilinear_taps_coeff[yoffset]);
} else if (yoffset == 0) {
var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne,
kHeight8PlusOne, kWidth8,
var_filter_block2d_bil_w8(src, temp2, src_stride, 1,
9, 8,
bilinear_taps_coeff[xoffset]);
} else {
var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne,
kHeight8PlusOne, kWidth8,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
bilinear_taps_coeff[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8,
kWidth8, bilinear_taps_coeff[yoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
8, bilinear_taps_coeff[yoffset]);
}
return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse);
return variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
+12 -7
View File
@@ -9,10 +9,14 @@
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/variance.h"
#include "vp8/common/filter.h"
// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
#if CONFIG_VP8_ENCODER
#if HAVE_MEDIA
#include "vp8/common/arm/bilinearfilter_arm.h"
@@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
8, 8, 8, VFilter);
return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
dst_pixels_per_line, sse);
return vpx_variance8x8_media(second_pass, 8, dst_ptr,
dst_pixels_per_line, sse);
}
unsigned int vp8_sub_pixel_variance16x16_armv6
@@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
16, 16, 16, VFilter);
var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
dst_pixels_per_line, sse);
var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
dst_pixels_per_line, sse);
}
return var;
}
#endif /* HAVE_MEDIA */
#endif // HAVE_MEDIA
#if HAVE_NEON
@@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
}
#endif
#endif // HAVE_NEON
#endif // CONFIG_VP8_ENCODER
+4
View File
@@ -187,8 +187,12 @@ typedef struct
{
FRAME_TYPE frame_type;
int is_frame_dropped;
// The frame rate for the lowest resolution.
double low_res_framerate;
/* The frame number of each reference frames */
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO;
#endif
+4 -4
View File
@@ -29,19 +29,19 @@ extern "C" {
#define vp8_copy( Dest, Src) { \
assert( sizeof( Dest) == sizeof( Src)); \
vpx_memcpy( Dest, Src, sizeof( Src)); \
memcpy( Dest, Src, sizeof( Src)); \
}
/* Use this for variably-sized arrays. */
#define vp8_copy_array( Dest, Src, N) { \
assert( sizeof( *Dest) == sizeof( *Src)); \
vpx_memcpy( Dest, Src, N * sizeof( *Src)); \
memcpy( Dest, Src, N * sizeof( *Src)); \
}
#define vp8_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest));
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
#ifdef __cplusplus
+32
View File
@@ -0,0 +1,32 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
for (r = 0; r < height; r++)
{
memcpy(dst_ptr, src_ptr, 32);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
-2
View File
@@ -81,7 +81,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
fprintf(mvs, "\n");
/* print out the block modes */
mb_index = 0;
fprintf(mvs, "Mbs for Frame %d\n", frame);
{
int b_row;
@@ -129,7 +128,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
/* print out the block modes */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
{
int b_row;
+1 -1
View File
@@ -38,6 +38,6 @@ void vp8_dequant_idct_add_c(short *input, short *dq,
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
vpx_memset(input, 0, 32);
memset(input, 0, 32);
}
+1 -2
View File
@@ -183,7 +183,6 @@ const vp8_extra_bit_struct vp8_extra_bits[12] =
void vp8_default_coef_probs(VP8_COMMON *pc)
{
vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
sizeof(default_coef_probs));
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
}
+4 -4
View File
@@ -159,13 +159,13 @@ const vp8_tree_index vp8_small_mvtree [14] =
void vp8_init_mbmode_probs(VP8_COMMON *x)
{
vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
}
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
{
vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
}
+5 -5
View File
@@ -40,9 +40,9 @@ static void copy_and_extend_plane
for (i = 0; i < h; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
vpx_memset(dest_ptr2, src_ptr2[0], er);
memset(dest_ptr1, src_ptr1[0], el);
memcpy(dest_ptr1 + el, src_ptr1, w);
memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += dp;
@@ -60,13 +60,13 @@ static void copy_and_extend_plane
for (i = 0; i < et; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += dp;
}
for (i = 0; i < eb; i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += dp;
}
}
+1
View File
@@ -10,6 +10,7 @@
#include "filter.h"
#include "./vp8_rtcd.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
@@ -17,6 +17,7 @@
#include "vpx_ports/x86.h"
#endif
#include "vp8/common/onyxc_int.h"
#include "vp8/common/systemdependent.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H && !defined(__OS2__)
+3 -3
View File
@@ -33,7 +33,7 @@ void vp8_dequant_idct_add_y_block_c
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
@@ -59,7 +59,7 @@ void vp8_dequant_idct_add_uv_block_c
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
@@ -78,7 +78,7 @@ void vp8_dequant_idct_add_uv_block_c
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
+1
View File
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
/****************************************************************************
* Notes:
+8 -8
View File
@@ -82,11 +82,10 @@ void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
if (block_inside_limit < 1)
block_inside_limit = 1;
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
SIMD_WIDTH);
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
SIMD_WIDTH);
memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
SIMD_WIDTH);
}
}
@@ -105,7 +104,7 @@ void vp8_loop_filter_init(VP8_COMMON *cm)
/* init hev threshold const vectors */
for(i = 0; i < 4 ; i++)
{
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
memset(lfi->hev_thr[i], i, SIMD_WIDTH);
}
}
@@ -142,8 +141,8 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
else /* Delta Value */
{
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
if (!mbd->mode_ref_lf_delta_enabled)
@@ -151,7 +150,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
/* we could get rid of this if we assume that deltas are set to
* zero when not in use; encoder always uses deltas
*/
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
continue;
}
@@ -261,6 +260,7 @@ void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
int mb_col;
int filter_level;
loop_filter_info_n *lfi_n = &cm->lf_info;
(void)post_uvstride;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
+24 -23
View File
@@ -17,10 +17,11 @@
* higher quality.
*/
#include "postproc.h"
#include "variance.h"
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/postproc.h"
#include "vp8/common/variance.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8_rtcd.h"
#include "vpx_scale/yv12config.h"
#include <limits.h>
@@ -150,36 +151,36 @@ static void multiframe_quality_enhance_block
if (blksize == 16)
{
actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
#ifdef USE_SSD
sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse));
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128)>>8;
usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse));
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32)>>6;
vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
{
actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
#ifdef USE_SSD
sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse));
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32)>>6;
usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse));
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8)>>4;
vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}
@@ -231,9 +232,9 @@ static void multiframe_quality_enhance_block
{
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
vpx_memcpy(udp, up, uvblksize);
memcpy(udp, up, uvblksize);
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
vpx_memcpy(vdp, vp, uvblksize);
memcpy(vdp, vp, uvblksize);
}
}
}
@@ -341,8 +342,8 @@ void vp8_multiframe_quality_enhance
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
vp += show->uv_stride, vdp += dest->uv_stride)
{
vpx_memcpy(udp, up, 4);
vpx_memcpy(vdp, vp, 4);
memcpy(udp, up, 4);
memcpy(vdp, vp, 4);
}
}
}
+1
View File
@@ -122,6 +122,7 @@ extern "C"
int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
unsigned int screen_content_mode;
/* mode ->
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
+7 -6
View File
@@ -355,8 +355,8 @@ void vp8_deblock(VP8_COMMON *cm,
else
mb_ppl = (unsigned char)ppl;
vpx_memset(ylptr, mb_ppl, 16);
vpx_memset(uvlptr, mb_ppl, 8);
memset(ylptr, mb_ppl, 16);
memset(uvlptr, mb_ppl, 8);
ylptr += 16;
uvlptr += 8;
@@ -403,7 +403,7 @@ void vp8_de_noise(VP8_COMMON *cm,
(void) low_var_thresh;
(void) flag;
vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols);
memset(limits, (unsigned char)ppl, 16 * mb_cols);
/* TODO: The original code don't filter the 2 outer rows and columns. */
for (mbr = 0; mbr < mb_rows; mbr++)
@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm,
}
}
double vp8_gaussian(double sigma, double mu, double x)
static double gaussian(double sigma, double mu, double x)
{
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
for (i = -32; i < 32; i++)
{
const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
if (v)
{
@@ -518,6 +518,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
unsigned int Width, unsigned int Height, int Pitch)
{
unsigned int i, j;
(void)bothclamp;
for (i = 0; i < Height; i++)
{
@@ -762,7 +763,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
/* insure that postproc is set to all 0's so that post proc
* doesn't pull random data in from edge
*/
vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
}
}
+7 -50
View File
@@ -10,6 +10,8 @@
#include <limits.h>
#include <string.h>
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
@@ -30,31 +32,8 @@ void vp8_copy_mem16x16_c(
for (r = 0; r < 16; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
dst[8] = src[8];
dst[9] = src[9];
dst[10] = src[10];
dst[11] = src[11];
dst[12] = src[12];
dst[13] = src[13];
dst[14] = src[14];
dst[15] = src[15];
memcpy(dst, src, 16);
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
#endif
src += src_stride;
dst += dst_stride;
@@ -72,19 +51,8 @@ void vp8_copy_mem8x8_c(
for (r = 0; r < 8; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
@@ -102,19 +70,8 @@ void vp8_copy_mem8x4_c(
for (r = 0; r < 4; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
+11 -11
View File
@@ -70,10 +70,10 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
expected_dc = 128;
}
/*vpx_memset(ypred_ptr, expected_dc, 256);*/
/*memset(ypred_ptr, expected_dc, 256);*/
for (r = 0; r < 16; r++)
{
vpx_memset(ypred_ptr, expected_dc, 16);
memset(ypred_ptr, expected_dc, 16);
ypred_ptr += y_stride;
}
}
@@ -98,7 +98,7 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
for (r = 0; r < 16; r++)
{
vpx_memset(ypred_ptr, yleft_col[r], 16);
memset(ypred_ptr, yleft_col[r], 16);
ypred_ptr += y_stride;
}
@@ -202,12 +202,12 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
}
/*vpx_memset(upred_ptr,expected_udc,64);*/
/*vpx_memset(vpred_ptr,expected_vdc,64);*/
/*memset(upred_ptr,expected_udc,64);*/
/*memset(vpred_ptr,expected_vdc,64);*/
for (i = 0; i < 8; i++)
{
vpx_memset(upred_ptr, expected_udc, 8);
vpx_memset(vpred_ptr, expected_vdc, 8);
memset(upred_ptr, expected_udc, 8);
memset(vpred_ptr, expected_vdc, 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
@@ -217,8 +217,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
{
for (i = 0; i < 8; i++)
{
vpx_memcpy(upred_ptr, uabove_row, 8);
vpx_memcpy(vpred_ptr, vabove_row, 8);
memcpy(upred_ptr, uabove_row, 8);
memcpy(vpred_ptr, vabove_row, 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
@@ -229,8 +229,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
{
for (i = 0; i < 8; i++)
{
vpx_memset(upred_ptr, uleft_col[i], 8);
vpx_memset(vpred_ptr, vleft_col[i], 8);
memset(upred_ptr, uleft_col[i], 8);
memset(vpred_ptr, vleft_col[i], 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
+2 -4
View File
@@ -7,15 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "./vpx_config.h"
#define RTCD_C
#include "vp8_rtcd.h"
#include "./vp8_rtcd.h"
#include "vpx_ports/vpx_once.h"
extern void vpx_scale_rtcd(void);
void vp8_rtcd()
{
vpx_scale_rtcd();
once(setup_rtcd_internal);
}
-302
View File
@@ -1,302 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include <stdlib.h>
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad, int m, int n)
{
int r, c;
unsigned int sad = 0;
for (r = 0; r < n; r++)
{
for (c = 0; c < m; c++)
{
sad += abs(src_ptr[c] - ref_ptr[c]);
}
if (sad > max_sad)
break;
src_ptr += src_stride;
ref_ptr += ref_stride;
}
return sad;
}
/* max_sad is provided as an optional optimization point. Alternative
* implementations of these functions are not required to check it.
*/
unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
}
unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
}
unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
}
unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
}
unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
}
void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
for (r = 0; r < height; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst_ptr[0] = src_ptr[0];
dst_ptr[1] = src_ptr[1];
dst_ptr[2] = src_ptr[2];
dst_ptr[3] = src_ptr[3];
dst_ptr[4] = src_ptr[4];
dst_ptr[5] = src_ptr[5];
dst_ptr[6] = src_ptr[6];
dst_ptr[7] = src_ptr[7];
dst_ptr[8] = src_ptr[8];
dst_ptr[9] = src_ptr[9];
dst_ptr[10] = src_ptr[10];
dst_ptr[11] = src_ptr[11];
dst_ptr[12] = src_ptr[12];
dst_ptr[13] = src_ptr[13];
dst_ptr[14] = src_ptr[14];
dst_ptr[15] = src_ptr[15];
dst_ptr[16] = src_ptr[16];
dst_ptr[17] = src_ptr[17];
dst_ptr[18] = src_ptr[18];
dst_ptr[19] = src_ptr[19];
dst_ptr[20] = src_ptr[20];
dst_ptr[21] = src_ptr[21];
dst_ptr[22] = src_ptr[22];
dst_ptr[23] = src_ptr[23];
dst_ptr[24] = src_ptr[24];
dst_ptr[25] = src_ptr[25];
dst_ptr[26] = src_ptr[26];
dst_ptr[27] = src_ptr[27];
dst_ptr[28] = src_ptr[28];
dst_ptr[29] = src_ptr[29];
dst_ptr[30] = src_ptr[30];
dst_ptr[31] = src_ptr[31];
#else
((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
#endif
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
+6 -6
View File
@@ -17,15 +17,15 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
int i;
/* set up frame new frame for intra coded blocks */
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
for (i = 0; i < ybf->y_height; i++)
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
@@ -33,7 +33,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
{
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
}
+20 -51
View File
@@ -14,50 +14,42 @@
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef unsigned int(*vp8_sad_fn_t)(
const unsigned char *src_ptr,
typedef unsigned int(*vpx_sad_fn_t)(
const uint8_t *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned int max_sad);
const uint8_t *ref_ptr,
int ref_stride);
typedef void (*vp8_copy32xn_fn_t)(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
unsigned char *ref_ptr,
int ref_stride,
int n);
typedef void (*vp8_sad_multi_fn_t)(
typedef void (*vpx_sad_multi_fn_t)(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
const unsigned char *ref_array,
int ref_stride,
unsigned int *sad_array);
typedef void (*vp8_sad_multi1_fn_t)
typedef void (*vpx_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
);
typedef void (*vp8_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char * const ref_ptr[],
const unsigned char * const ref_array[],
int ref_stride,
unsigned int *sad_array
);
typedef unsigned int (*vp8_variance_fn_t)
typedef unsigned int (*vpx_variance_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
@@ -77,40 +69,17 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
unsigned int *sse
);
typedef void (*vp8_ssimpf_fn_t)
(
unsigned char *s,
int sp,
unsigned char *r,
int rp,
unsigned long *sum_s,
unsigned long *sum_r,
unsigned long *sum_sq_s,
unsigned long *sum_sq_r,
unsigned long *sum_sxr
);
typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
typedef unsigned int (*vp8_get16x16prederror_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride
);
typedef struct variance_vtable
{
vp8_sad_fn_t sdf;
vp8_variance_fn_t vf;
vpx_sad_fn_t sdf;
vpx_variance_fn_t vf;
vp8_subpixvariance_fn_t svf;
vp8_variance_fn_t svf_halfpix_h;
vp8_variance_fn_t svf_halfpix_v;
vp8_variance_fn_t svf_halfpix_hv;
vp8_sad_multi_fn_t sdx3f;
vp8_sad_multi1_fn_t sdx8f;
vp8_sad_multi_d_fn_t sdx4df;
vpx_variance_fn_t svf_halfpix_h;
vpx_variance_fn_t svf_halfpix_v;
vpx_variance_fn_t svf_halfpix_hv;
vpx_sad_multi_fn_t sdx3f;
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
#if ARCH_X86 || ARCH_X86_64
vp8_copy32xn_fn_t copymem;
#endif
+23 -144
View File
@@ -8,43 +8,34 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "variance.h"
#include "./vp8_rtcd.h"
#include "filter.h"
#include "variance.h"
unsigned int vp8_get_mb_ss_c
(
const short *src_ptr
)
{
unsigned int i = 0, sum = 0;
do
{
sum += (src_ptr[i] * src_ptr[i]);
i++;
}
while (i < 256);
return sum;
/* This is a bad idea.
* ctz = count trailing zeros */
static int ctz(int a) {
int b = 0;
while (a != 1) {
a >>= 1;
b++;
}
return b;
}
static void variance(
static unsigned int variance(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
int w,
int h,
unsigned int *sse,
int *sum)
unsigned int *sse)
{
int i, j;
int diff;
int diff, sum;
*sum = 0;
sum = 0;
*sse = 0;
for (i = 0; i < h; i++)
@@ -52,114 +43,17 @@ static void variance(
for (j = 0; j < w; j++)
{
diff = src_ptr[j] - ref_ptr[j];
*sum += diff;
sum += diff;
*sse += diff * diff;
}
src_ptr += source_stride;
ref_ptr += recon_stride;
}
return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
}
unsigned int vp8_variance16x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 8));
}
unsigned int vp8_variance8x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance16x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_variance4x4_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_mse16x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
*sse = var;
return var;
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_first_pass
@@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
/* Now filter Verticaly */
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
}
@@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
}
unsigned int vp8_sub_pixel_variance16x16_c
@@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
}
@@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
}
unsigned int vp8_sub_pixel_mse16x16_c
(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_c
(
const unsigned char *src_ptr,
@@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
}
unsigned int vp8_sub_pixel_variance8x16_c
@@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
}
+93
View File
@@ -0,0 +1,93 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp8_copy32xn_sse2(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;dst_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;dst_stride
movsxd rcx, dword ptr arg(4) ;height
.block_copy_sse2_loopx4:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
movdqu xmm2, XMMWORD PTR [rsi + rax]
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqu xmm4, XMMWORD PTR [rsi]
movdqu xmm5, XMMWORD PTR [rsi + 16]
movdqu xmm6, XMMWORD PTR [rsi + rax]
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
movdqa XMMWORD PTR [rdi + rdx], xmm2
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
lea rdi, [rdi+rdx*2]
movdqa XMMWORD PTR [rdi], xmm4
movdqa XMMWORD PTR [rdi + 16], xmm5
movdqa XMMWORD PTR [rdi + rdx], xmm6
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
lea rdi, [rdi+rdx*2]
sub rcx, 4
cmp rcx, 4
jge .block_copy_sse2_loopx4
cmp rcx, 0
je .copy_is_done
.block_copy_sse2_loop:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
lea rsi, [rsi+rax]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
lea rdi, [rdi+rdx]
sub rcx, 1
jne .block_copy_sse2_loop
.copy_is_done:
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
+146
View File
@@ -0,0 +1,146 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro STACK_FRAME_CREATE_X3 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define ref_ptr rdi
%define ref_stride rdx
%define end_ptr rcx
%define ret_var rbx
%define result_ptr arg(4)
%define max_sad arg(4)
%define height dword ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
mov rsi, arg(0) ; src_ptr
mov rdi, arg(2) ; ref_ptr
movsxd rax, dword ptr arg(1) ; src_stride
movsxd rdx, dword ptr arg(3) ; ref_stride
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define ref_ptr r8
%define ref_stride r9
%define end_ptr r10
%define ret_var r11
%define result_ptr [rsp+xmm_stack_space+8+4*8]
%define max_sad [rsp+xmm_stack_space+8+4*8]
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
%define src_stride rsi
%define ref_ptr rdx
%define ref_stride rcx
%define end_ptr r9
%define ret_var r10
%define result_ptr r8
%define max_sad r8
%define height r8
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X3 0
%define src_ptr
%define src_stride
%define ref_ptr
%define ref_stride
%define end_ptr
%define ret_var
%define result_ptr
%define max_sad
%define height
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
RESTORE_XMM
%endif
%endif
ret
%endmacro
;void vp8_copy32xn_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
.block_copy_sse3_loopx4:
lea end_ptr, [src_ptr+src_stride*2]
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
movdqu xmm4, XMMWORD PTR [end_ptr]
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
lea src_ptr, [src_ptr+src_stride*4]
lea end_ptr, [ref_ptr+ref_stride*2]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
movdqa XMMWORD PTR [end_ptr], xmm4
movdqa XMMWORD PTR [end_ptr + 16], xmm5
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
lea ref_ptr, [ref_ptr+ref_stride*4]
sub height, 4
cmp height, 4
jge .block_copy_sse3_loopx4
;Check to see if there is more rows need to be copied.
cmp height, 0
je .copy_is_done
.block_copy_sse3_loop:
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
lea src_ptr, [src_ptr+src_stride]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
lea ref_ptr, [ref_ptr+ref_stride]
sub height, 1
jne .block_copy_sse3_loop
.copy_is_done:
STACK_FRAME_DESTROY_X3
+8 -8
View File
@@ -36,7 +36,7 @@ void vp8_dequant_idct_add_y_block_mmx
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
@@ -45,7 +45,7 @@ void vp8_dequant_idct_add_y_block_mmx
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
dst+4, stride);
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
memset(q + 16, 0, 2 * sizeof(q[0]));
}
if (eobs[2] > 1)
@@ -54,7 +54,7 @@ void vp8_dequant_idct_add_y_block_mmx
{
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
dst+8, stride);
vpx_memset(q + 32, 0, 2 * sizeof(q[0]));
memset(q + 32, 0, 2 * sizeof(q[0]));
}
if (eobs[3] > 1)
@@ -63,7 +63,7 @@ void vp8_dequant_idct_add_y_block_mmx
{
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
dst+12, stride);
vpx_memset(q + 48, 0, 2 * sizeof(q[0]));
memset(q + 48, 0, 2 * sizeof(q[0]));
}
q += 64;
@@ -85,7 +85,7 @@ void vp8_dequant_idct_add_uv_block_mmx
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
@@ -94,7 +94,7 @@ void vp8_dequant_idct_add_uv_block_mmx
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
dstu+4, stride);
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
@@ -109,7 +109,7 @@ void vp8_dequant_idct_add_uv_block_mmx
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
vpx_memset(q, 0, 2 * sizeof(q[0]));
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
@@ -118,7 +118,7 @@ void vp8_dequant_idct_add_uv_block_mmx
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
dstv+4, stride);
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
-410
View File
@@ -1,410 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;unsigned int vp8_sad16x16_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad16x16_wmt) PRIVATE
sym(vp8_sad16x16_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM 6
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rax*8]
lea rcx, [rcx+rax*8]
pxor xmm6, xmm6
.x16x16sad_wmt_loop:
movq xmm0, QWORD PTR [rsi]
movq xmm2, QWORD PTR [rsi+8]
movq xmm1, QWORD PTR [rdi]
movq xmm3, QWORD PTR [rdi+8]
movq xmm4, QWORD PTR [rsi+rax]
movq xmm5, QWORD PTR [rdi+rdx]
punpcklbw xmm0, xmm2
punpcklbw xmm1, xmm3
psadbw xmm0, xmm1
movq xmm2, QWORD PTR [rsi+rax+8]
movq xmm3, QWORD PTR [rdi+rdx+8]
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
punpcklbw xmm4, xmm2
punpcklbw xmm5, xmm3
psadbw xmm4, xmm5
paddw xmm6, xmm0
paddw xmm6, xmm4
cmp rsi, rcx
jne .x16x16sad_wmt_loop
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movq rax, xmm0
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad8x16_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
global sym(vp8_sad8x16_wmt) PRIVATE
sym(vp8_sad8x16_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
lea rcx, [rcx+rbx*8]
pxor mm7, mm7
.x8x16sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x8x16sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
movq mm2, QWORD PTR [rsi+rbx]
movq mm3, QWORD PTR [rdi+rdx]
psadbw mm0, mm1
psadbw mm2, mm3
lea rsi, [rsi+rbx*2]
lea rdi, [rdi+rdx*2]
paddw mm7, mm0
paddw mm7, mm2
cmp rsi, rcx
jne .x8x16sad_wmt_loop
movq rax, mm7
.x8x16sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad8x8_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad8x8_wmt) PRIVATE
sym(vp8_sad8x8_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
pxor mm7, mm7
.x8x8sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x8x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
psadbw mm0, mm1
lea rsi, [rsi+rbx]
add rdi, rdx
paddw mm7, mm0
cmp rsi, rcx
jne .x8x8sad_wmt_loop
movq rax, mm7
.x8x8sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad4x4_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad4x4_wmt) PRIVATE
sym(vp8_sad4x4_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
movd mm0, DWORD PTR [rsi]
movd mm1, DWORD PTR [rdi]
movd mm2, DWORD PTR [rsi+rax]
movd mm3, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
psadbw mm0, mm1
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movd mm4, DWORD PTR [rsi]
movd mm5, DWORD PTR [rdi]
movd mm6, DWORD PTR [rsi+rax]
movd mm7, DWORD PTR [rdi+rdx]
punpcklbw mm4, mm6
punpcklbw mm5, mm7
psadbw mm4, mm5
paddw mm0, mm4
movq rax, mm0
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad16x8_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad16x8_wmt) PRIVATE
sym(vp8_sad16x8_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
pxor mm7, mm7
.x16x8sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x16x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm2, QWORD PTR [rsi+8]
movq mm1, QWORD PTR [rdi]
movq mm3, QWORD PTR [rdi+8]
movq mm4, QWORD PTR [rsi+rbx]
movq mm5, QWORD PTR [rdi+rdx]
psadbw mm0, mm1
psadbw mm2, mm3
movq mm1, QWORD PTR [rsi+rbx+8]
movq mm3, QWORD PTR [rdi+rdx+8]
psadbw mm4, mm5
psadbw mm1, mm3
lea rsi, [rsi+rbx*2]
lea rdi, [rdi+rdx*2]
paddw mm0, mm2
paddw mm4, mm1
paddw mm7, mm0
paddw mm7, mm4
cmp rsi, rcx
jne .x16x8sad_wmt_loop
movq rax, mm7
.x16x8sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;void vp8_copy32xn_sse2(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;dst_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;dst_stride
movsxd rcx, dword ptr arg(4) ;height
.block_copy_sse2_loopx4:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
movdqu xmm2, XMMWORD PTR [rsi + rax]
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqu xmm4, XMMWORD PTR [rsi]
movdqu xmm5, XMMWORD PTR [rsi + 16]
movdqu xmm6, XMMWORD PTR [rsi + rax]
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
movdqa XMMWORD PTR [rdi + rdx], xmm2
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
lea rdi, [rdi+rdx*2]
movdqa XMMWORD PTR [rdi], xmm4
movdqa XMMWORD PTR [rdi + 16], xmm5
movdqa XMMWORD PTR [rdi + rdx], xmm6
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
lea rdi, [rdi+rdx*2]
sub rcx, 4
cmp rcx, 4
jge .block_copy_sse2_loopx4
cmp rcx, 0
je .copy_is_done
.block_copy_sse2_loop:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
lea rsi, [rsi+rax]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
lea rdi, [rdi+rdx]
sub rcx, 1
jne .block_copy_sse2_loop
.copy_is_done:
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-960
View File
@@ -1,960 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro STACK_FRAME_CREATE_X3 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define ref_ptr rdi
%define ref_stride rdx
%define end_ptr rcx
%define ret_var rbx
%define result_ptr arg(4)
%define max_sad arg(4)
%define height dword ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
mov rsi, arg(0) ; src_ptr
mov rdi, arg(2) ; ref_ptr
movsxd rax, dword ptr arg(1) ; src_stride
movsxd rdx, dword ptr arg(3) ; ref_stride
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define ref_ptr r8
%define ref_stride r9
%define end_ptr r10
%define ret_var r11
%define result_ptr [rsp+xmm_stack_space+8+4*8]
%define max_sad [rsp+xmm_stack_space+8+4*8]
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
%define src_stride rsi
%define ref_ptr rdx
%define ref_stride rcx
%define end_ptr r9
%define ret_var r10
%define result_ptr r8
%define max_sad r8
%define height r8
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X3 0
%define src_ptr
%define src_stride
%define ref_ptr
%define ref_stride
%define end_ptr
%define ret_var
%define result_ptr
%define max_sad
%define height
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
RESTORE_XMM
%endif
%endif
ret
%endmacro
%macro STACK_FRAME_CREATE_X4 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define r0_ptr rcx
%define r1_ptr rdx
%define r2_ptr rbx
%define r3_ptr rdi
%define ref_stride rbp
%define result_ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
push rbp
mov rdi, arg(2) ; ref_ptr_base
LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
mov rsi, arg(0) ; src_ptr
movsxd rbx, dword ptr arg(1) ; src_stride
movsxd rbp, dword ptr arg(3) ; ref_stride
xchg rbx, rax
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define r0_ptr rsi
%define r1_ptr r10
%define r2_ptr r11
%define r3_ptr r8
%define ref_stride r9
%define result_ptr [rsp+xmm_stack_space+16+4*8]
push rsi
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
%else
%define src_ptr rdi
%define src_stride rsi
%define r0_ptr r9
%define r1_ptr r10
%define r2_ptr r11
%define r3_ptr rdx
%define ref_stride rcx
%define result_ptr r8
LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X4 0
%define src_ptr
%define src_stride
%define r0_ptr
%define r1_ptr
%define r2_ptr
%define r3_ptr
%define ref_stride
%define result_ptr
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
pop rsi
RESTORE_XMM
%endif
%endif
ret
%endmacro
%macro PROCESS_16X2X3 5
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm5, XMMWORD PTR [%3]
lddqu xmm6, XMMWORD PTR [%3+1]
lddqu xmm7, XMMWORD PTR [%3+2]
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm1, XMMWORD PTR [%3]
lddqu xmm2, XMMWORD PTR [%3+1]
lddqu xmm3, XMMWORD PTR [%3+2]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm5, xmm1
paddw xmm6, xmm2
paddw xmm7, xmm3
%endif
movdqa xmm0, XMMWORD PTR [%2+%4]
lddqu xmm1, XMMWORD PTR [%3+%5]
lddqu xmm2, XMMWORD PTR [%3+%5+1]
lddqu xmm3, XMMWORD PTR [%3+%5+2]
%if %1==0 || %1==1
lea %2, [%2+%4*2]
lea %3, [%3+%5*2]
%endif
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm5, xmm1
paddw xmm6, xmm2
paddw xmm7, xmm3
%endmacro
%macro PROCESS_8X2X3 5
%if %1==0
movq mm0, QWORD PTR [%2]
movq mm5, QWORD PTR [%3]
movq mm6, QWORD PTR [%3+1]
movq mm7, QWORD PTR [%3+2]
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
movq mm0, QWORD PTR [%2]
movq mm1, QWORD PTR [%3]
movq mm2, QWORD PTR [%3+1]
movq mm3, QWORD PTR [%3+2]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm5, mm1
paddw mm6, mm2
paddw mm7, mm3
%endif
movq mm0, QWORD PTR [%2+%4]
movq mm1, QWORD PTR [%3+%5]
movq mm2, QWORD PTR [%3+%5+1]
movq mm3, QWORD PTR [%3+%5+2]
%if %1==0 || %1==1
lea %2, [%2+%4*2]
lea %3, [%3+%5*2]
%endif
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm5, mm1
paddw mm6, mm2
paddw mm7, mm3
%endmacro
%macro LOAD_X4_ADDRESSES 5
mov %2, [%1+REG_SZ_BYTES*0]
mov %3, [%1+REG_SZ_BYTES*1]
mov %4, [%1+REG_SZ_BYTES*2]
mov %5, [%1+REG_SZ_BYTES*3]
%endmacro
%macro PROCESS_16X2X4 8
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm4, XMMWORD PTR [%3]
lddqu xmm5, XMMWORD PTR [%4]
lddqu xmm6, XMMWORD PTR [%5]
lddqu xmm7, XMMWORD PTR [%6]
psadbw xmm4, xmm0
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm1, XMMWORD PTR [%3]
lddqu xmm2, XMMWORD PTR [%4]
lddqu xmm3, XMMWORD PTR [%5]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
lddqu xmm1, XMMWORD PTR [%6]
paddw xmm5, xmm2
paddw xmm6, xmm3
psadbw xmm1, xmm0
paddw xmm7, xmm1
%endif
movdqa xmm0, XMMWORD PTR [%2+%7]
lddqu xmm1, XMMWORD PTR [%3+%8]
lddqu xmm2, XMMWORD PTR [%4+%8]
lddqu xmm3, XMMWORD PTR [%5+%8]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
lddqu xmm1, XMMWORD PTR [%6+%8]
paddw xmm5, xmm2
paddw xmm6, xmm3
%if %1==0 || %1==1
lea %2, [%2+%7*2]
lea %3, [%3+%8*2]
lea %4, [%4+%8*2]
lea %5, [%5+%8*2]
lea %6, [%6+%8*2]
%endif
psadbw xmm1, xmm0
paddw xmm7, xmm1
%endmacro
%macro PROCESS_8X2X4 8
%if %1==0
movq mm0, QWORD PTR [%2]
movq mm4, QWORD PTR [%3]
movq mm5, QWORD PTR [%4]
movq mm6, QWORD PTR [%5]
movq mm7, QWORD PTR [%6]
psadbw mm4, mm0
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
movq mm0, QWORD PTR [%2]
movq mm1, QWORD PTR [%3]
movq mm2, QWORD PTR [%4]
movq mm3, QWORD PTR [%5]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm4, mm1
movq mm1, QWORD PTR [%6]
paddw mm5, mm2
paddw mm6, mm3
psadbw mm1, mm0
paddw mm7, mm1
%endif
movq mm0, QWORD PTR [%2+%7]
movq mm1, QWORD PTR [%3+%8]
movq mm2, QWORD PTR [%4+%8]
movq mm3, QWORD PTR [%5+%8]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm4, mm1
movq mm1, QWORD PTR [%6+%8]
paddw mm5, mm2
paddw mm6, mm3
%if %1==0 || %1==1
lea %2, [%2+%7*2]
lea %3, [%3+%8*2]
lea %4, [%4+%8*2]
lea %5, [%5+%8*2]
lea %6, [%6+%8*2]
%endif
psadbw mm1, mm0
paddw mm7, mm1
%endmacro
;void int vp8_sad16x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad16x16x3_sse3) PRIVATE
sym(vp8_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+4], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+8], xmm0
STACK_FRAME_DESTROY_X3
;void int vp8_sad16x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad16x8x3_sse3) PRIVATE
sym(vp8_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+4], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+8], xmm0
STACK_FRAME_DESTROY_X3
;void int vp8_sad8x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x16x3_sse3) PRIVATE
sym(vp8_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
punpckldq mm5, mm6
movq [rcx], mm5
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;void int vp8_sad8x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x8x3_sse3) PRIVATE
sym(vp8_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
punpckldq mm5, mm6
movq [rcx], mm5
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;void int vp8_sad4x4x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad4x4x3_sse3) PRIVATE
sym(vp8_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3
movd mm0, DWORD PTR [src_ptr]
movd mm1, DWORD PTR [ref_ptr]
movd mm2, DWORD PTR [src_ptr+src_stride]
movd mm3, DWORD PTR [ref_ptr+ref_stride]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
movd mm4, DWORD PTR [ref_ptr+1]
movd mm5, DWORD PTR [ref_ptr+2]
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
psadbw mm1, mm0
punpcklbw mm4, mm2
punpcklbw mm5, mm3
psadbw mm4, mm0
psadbw mm5, mm0
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
movd mm0, DWORD PTR [src_ptr]
movd mm2, DWORD PTR [ref_ptr]
movd mm3, DWORD PTR [src_ptr+src_stride]
movd mm6, DWORD PTR [ref_ptr+ref_stride]
punpcklbw mm0, mm3
punpcklbw mm2, mm6
movd mm3, DWORD PTR [ref_ptr+1]
movd mm7, DWORD PTR [ref_ptr+2]
psadbw mm2, mm0
paddw mm1, mm2
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
punpcklbw mm3, mm2
punpcklbw mm7, mm6
psadbw mm3, mm0
psadbw mm7, mm0
paddw mm3, mm4
paddw mm7, mm5
mov rcx, result_ptr
punpckldq mm1, mm3
movq [rcx], mm1
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;unsigned int vp8_sad16x16_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
;%define lddqu movdqu
global sym(vp8_sad16x16_sse3) PRIVATE
sym(vp8_sad16x16_sse3):
STACK_FRAME_CREATE_X3
mov end_ptr, 4
pxor xmm7, xmm7
.vp8_sad16x16_sse3_loop:
movdqa xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [ref_ptr]
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
movdqa xmm4, XMMWORD PTR [src_ptr]
movdqu xmm5, XMMWORD PTR [ref_ptr]
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
psadbw xmm0, xmm1
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
psadbw xmm2, xmm3
psadbw xmm4, xmm5
psadbw xmm6, xmm1
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
paddw xmm7, xmm0
paddw xmm7, xmm2
paddw xmm7, xmm4
paddw xmm7, xmm6
sub end_ptr, 1
jne .vp8_sad16x16_sse3_loop
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movq rax, xmm0
STACK_FRAME_DESTROY_X3
;void vp8_copy32xn_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
.block_copy_sse3_loopx4:
lea end_ptr, [src_ptr+src_stride*2]
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
movdqu xmm4, XMMWORD PTR [end_ptr]
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
lea src_ptr, [src_ptr+src_stride*4]
lea end_ptr, [ref_ptr+ref_stride*2]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
movdqa XMMWORD PTR [end_ptr], xmm4
movdqa XMMWORD PTR [end_ptr + 16], xmm5
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
lea ref_ptr, [ref_ptr+ref_stride*4]
sub height, 4
cmp height, 4
jge .block_copy_sse3_loopx4
;Check to see if there is more rows need to be copied.
cmp height, 0
je .copy_is_done
.block_copy_sse3_loop:
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
lea src_ptr, [src_ptr+src_stride]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
lea ref_ptr, [ref_ptr+ref_stride]
sub height, 1
jne .block_copy_sse3_loop
.copy_is_done:
STACK_FRAME_DESTROY_X3
;void vp8_sad16x16x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
global sym(vp8_sad16x16x4d_sse3) PRIVATE
sym(vp8_sad16x16x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
movq xmm0, xmm4
psrldq xmm4, 8
paddw xmm0, xmm4
movd [rcx], xmm0
;-
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx+4], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+8], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+12], xmm0
STACK_FRAME_DESTROY_X4
;void vp8_sad16x8x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
global sym(vp8_sad16x8x4d_sse3) PRIVATE
sym(vp8_sad16x8x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
movq xmm0, xmm4
psrldq xmm4, 8
paddw xmm0, xmm4
movd [rcx], xmm0
;-
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx+4], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+8], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+12], xmm0
STACK_FRAME_DESTROY_X4
;void int vp8_sad8x16x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x16x4d_sse3) PRIVATE
sym(vp8_sad8x16x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
punpckldq mm4, mm5
punpckldq mm6, mm7
movq [rcx], mm4
movq [rcx+8], mm6
STACK_FRAME_DESTROY_X4
;void int vp8_sad8x8x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x8x4d_sse3) PRIVATE
sym(vp8_sad8x8x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
punpckldq mm4, mm5
punpckldq mm6, mm7
movq [rcx], mm4
movq [rcx+8], mm6
STACK_FRAME_DESTROY_X4
;void int vp8_sad4x4x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad4x4x4d_sse3) PRIVATE
sym(vp8_sad4x4x4d_sse3):
STACK_FRAME_CREATE_X4
movd mm0, DWORD PTR [src_ptr]
movd mm1, DWORD PTR [r0_ptr]
movd mm2, DWORD PTR [src_ptr+src_stride]
movd mm3, DWORD PTR [r0_ptr+ref_stride]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
movd mm4, DWORD PTR [r1_ptr]
movd mm5, DWORD PTR [r2_ptr]
movd mm6, DWORD PTR [r3_ptr]
movd mm2, DWORD PTR [r1_ptr+ref_stride]
movd mm3, DWORD PTR [r2_ptr+ref_stride]
movd mm7, DWORD PTR [r3_ptr+ref_stride]
psadbw mm1, mm0
punpcklbw mm4, mm2
punpcklbw mm5, mm3
punpcklbw mm6, mm7
psadbw mm4, mm0
psadbw mm5, mm0
psadbw mm6, mm0
lea src_ptr, [src_ptr+src_stride*2]
lea r0_ptr, [r0_ptr+ref_stride*2]
lea r1_ptr, [r1_ptr+ref_stride*2]
lea r2_ptr, [r2_ptr+ref_stride*2]
lea r3_ptr, [r3_ptr+ref_stride*2]
movd mm0, DWORD PTR [src_ptr]
movd mm2, DWORD PTR [r0_ptr]
movd mm3, DWORD PTR [src_ptr+src_stride]
movd mm7, DWORD PTR [r0_ptr+ref_stride]
punpcklbw mm0, mm3
punpcklbw mm2, mm7
movd mm3, DWORD PTR [r1_ptr]
movd mm7, DWORD PTR [r2_ptr]
psadbw mm2, mm0
%if ABI_IS_32BIT
mov rax, rbp
pop rbp
%define ref_stride rax
%endif
mov rsi, result_ptr
paddw mm1, mm2
movd [rsi], mm1
movd mm2, DWORD PTR [r1_ptr+ref_stride]
movd mm1, DWORD PTR [r2_ptr+ref_stride]
punpcklbw mm3, mm2
punpcklbw mm7, mm1
psadbw mm3, mm0
psadbw mm7, mm0
movd mm2, DWORD PTR [r3_ptr]
movd mm1, DWORD PTR [r3_ptr+ref_stride]
paddw mm3, mm4
paddw mm7, mm5
movd [rsi+4], mm3
punpcklbw mm2, mm1
movd [rsi+8], mm7
psadbw mm2, mm0
paddw mm2, mm6
movd [rsi+12], mm2
STACK_FRAME_DESTROY_X4
-353
View File
@@ -1,353 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro PROCESS_16X2X8 1
%if %1
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm1, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm1, xmm2
paddw xmm1, xmm3
paddw xmm1, xmm4
%else
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endif
movdqa xmm0, XMMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
movq xmm2, MMWORD PTR [rdi+ rdx+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endmacro
%macro PROCESS_8X2X8 1
%if %1
movq xmm0, MMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm1, xmm2
%else
movq xmm0, MMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endif
movq xmm0, MMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endmacro
%macro PROCESS_4X2X8 1
%if %1
movd xmm0, [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
mpsadbw xmm1, xmm0, 0x0
%else
movd xmm0, [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endif
movd xmm0, [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endmacro
;void vp8_sad16x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
global sym(vp8_sad16x16x8_sse4) PRIVATE
sym(vp8_sad16x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad16x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad16x8x8_sse4) PRIVATE
sym(vp8_sad16x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x8x8_sse4) PRIVATE
sym(vp8_sad8x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x16x8_sse4) PRIVATE
sym(vp8_sad8x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad4x4x8_c(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad4x4x8_sse4) PRIVATE
sym(vp8_sad4x4x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_4X2X8 1
PROCESS_4X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
@@ -13,393 +13,6 @@
%define xmm_filter_shift 7
;unsigned int vp8_get_mb_ss_sse2
;(
; short *src_ptr
;)
global sym(vp8_get_mb_ss_sse2) PRIVATE
sym(vp8_get_mb_ss_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 1
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rax, arg(0) ;[src_ptr]
mov rcx, 8
pxor xmm4, xmm4
.NEXTROW:
movdqa xmm0, [rax]
movdqa xmm1, [rax+16]
movdqa xmm2, [rax+32]
movdqa xmm3, [rax+48]
pmaddwd xmm0, xmm0
pmaddwd xmm1, xmm1
pmaddwd xmm2, xmm2
pmaddwd xmm3, xmm3
paddd xmm0, xmm1
paddd xmm2, xmm3
paddd xmm4, xmm0
paddd xmm4, xmm2
add rax, 0x40
dec rcx
ja .NEXTROW
movdqa xmm3,xmm4
psrldq xmm4,8
paddd xmm4,xmm3
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
movq rax,xmm4
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get16x16var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
; unsigned char * ref_ptr,
; int recon_stride,
; unsigned int * SSE,
; int * Sum
;)
global sym(vp8_get16x16var_sse2) PRIVATE
sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
; Prefetch data
lea rcx, [rax+rax*2]
prefetcht0 [rsi]
prefetcht0 [rsi+rax]
prefetcht0 [rsi+rax*2]
prefetcht0 [rsi+rcx]
lea rbx, [rsi+rax*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rax]
prefetcht0 [rbx+rax*2]
prefetcht0 [rbx+rcx]
lea rcx, [rdx+rdx*2]
prefetcht0 [rdi]
prefetcht0 [rdi+rdx]
prefetcht0 [rdi+rdx*2]
prefetcht0 [rdi+rcx]
lea rbx, [rdi+rdx*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rdx]
prefetcht0 [rbx+rdx*2]
prefetcht0 [rbx+rcx]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
mov rcx, 16
.var16loop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
prefetcht0 [rsi+rax*8]
prefetcht0 [rdi+rdx*8]
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklbw xmm1, xmm0
punpckhbw xmm3, xmm0
punpcklbw xmm2, xmm0
punpckhbw xmm4, xmm0
psubw xmm1, xmm2
psubw xmm3, xmm4
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
paddw xmm7, xmm3
pmaddwd xmm3, xmm3
paddd xmm6, xmm1
paddd xmm6, xmm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz .var16loop
movdqa xmm1, xmm6
pxor xmm6, xmm6
pxor xmm5, xmm5
punpcklwd xmm6, xmm7
punpckhwd xmm5, xmm7
psrad xmm5, 16
psrad xmm6, 16
paddd xmm6, xmm5
movdqa xmm2, xmm1
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddd xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddd xmm7, xmm6
paddd xmm1, xmm2
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
movd DWORD PTR [rax], xmm7
movd DWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
pop rbx
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get8x8var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
; unsigned char * ref_ptr,
; int recon_stride,
; unsigned int * SSE,
; int * Sum
;)
global sym(vp8_get8x8var_sse2) PRIVATE
sym(vp8_get8x8var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
movq xmm1, QWORD PTR [rsi]
movq xmm2, QWORD PTR [rdi]
punpcklbw xmm1, xmm0
punpcklbw xmm2, xmm0
psubsw xmm1, xmm2
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax * 2]
movq xmm3, QWORD PTR[rdi + rdx * 2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax *2]
movq xmm3, QWORD PTR[rdi + rdx *2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax *2]
movq xmm3, QWORD PTR[rdi + rdx *2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movdqa xmm6, xmm7
punpcklwd xmm6, xmm0
punpckhwd xmm7, xmm0
movdqa xmm2, xmm1
paddw xmm6, xmm7
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddw xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddw xmm7, xmm6
paddd xmm1, xmm2
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx
movd DWORD PTR [rdi], xmm1
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block2d_bil_var_sse2
;(
; unsigned char *ref_ptr,
+1 -9
View File
@@ -8,19 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
extern unsigned int vp8_get16x16var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern void vp8_half_horiz_vert_variance16x_h_sse2
(
const unsigned char *ref_ptr,
+11 -11
View File
@@ -127,7 +127,7 @@ void vp8_sixtap_predict4x4_mmx
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -148,7 +148,7 @@ void vp8_sixtap_predict16x16_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -180,7 +180,7 @@ void vp8_sixtap_predict8x8_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -206,7 +206,7 @@ void vp8_sixtap_predict8x4_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -252,7 +252,7 @@ void vp8_sixtap_predict16x16_sse2
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -292,7 +292,7 @@ void vp8_sixtap_predict8x8_sse2
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
@@ -330,7 +330,7 @@ void vp8_sixtap_predict8x4_sse2
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
@@ -432,7 +432,7 @@ void vp8_sixtap_predict16x16_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
if (xoffset)
{
@@ -480,7 +480,7 @@ void vp8_sixtap_predict8x8_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
if (xoffset)
{
@@ -528,7 +528,7 @@ void vp8_sixtap_predict8x4_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
if (xoffset)
{
@@ -576,7 +576,7 @@ void vp8_sixtap_predict4x4_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
if (xoffset)
{
@@ -0,0 +1,353 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%define mmx_filter_shift 7
;void vp8_filter_block2d_bil4x4_var_mmx
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned short *HFilter,
; unsigned short *VFilter,
; int *sum,
; unsigned int *sumsquared
;)
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil4x4_var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
pxor mm6, mm6 ;
pxor mm7, mm7 ;
mov rax, arg(4) ;HFilter ;
mov rdx, arg(5) ;VFilter ;
mov rsi, arg(0) ;ref_ptr ;
mov rdi, arg(2) ;src_ptr ;
mov rcx, 4 ;
pxor mm0, mm0 ;
movd mm1, [rsi] ;
movd mm3, [rsi+1] ;
punpcklbw mm1, mm0 ;
pmullw mm1, [rax] ;
punpcklbw mm3, mm0 ;
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm5, mm1
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
add rsi, r8
%endif
.filter_block2d_bil4x4_var_mmx_loop:
movd mm1, [rsi] ;
movd mm3, [rsi+1] ;
punpcklbw mm1, mm0 ;
pmullw mm1, [rax] ;
punpcklbw mm3, mm0 ;
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm3, mm5 ;
movq mm5, mm1 ;
pmullw mm3, [rdx] ;
pmullw mm1, [rdx+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movd mm3, [rdi] ;
punpcklbw mm3, mm0 ;
psubw mm1, mm3 ;
paddw mm6, mm1 ;
pmaddwd mm1, mm1 ;
paddd mm7, mm1 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
add rsi, r8
add rdi, r9
%endif
sub rcx, 1 ;
jnz .filter_block2d_bil4x4_var_mmx_loop ;
pxor mm3, mm3 ;
pxor mm2, mm2 ;
punpcklwd mm2, mm6 ;
punpckhwd mm3, mm6 ;
paddd mm2, mm3 ;
movq mm6, mm2 ;
psrlq mm6, 32 ;
paddd mm2, mm6 ;
psrad mm2, 16 ;
movq mm4, mm7 ;
psrlq mm4, 32 ;
paddd mm4, mm7 ;
mov rdi, arg(6) ;sum
mov rsi, arg(7) ;sumsquared
movd dword ptr [rdi], mm2 ;
movd dword ptr [rsi], mm4 ;
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block2d_bil_var_mmx
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; unsigned short *HFilter,
; unsigned short *VFilter,
; int *sum,
; unsigned int *sumsquared
;)
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil_var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
pxor mm6, mm6 ;
pxor mm7, mm7 ;
mov rax, arg(5) ;HFilter ;
mov rdx, arg(6) ;VFilter ;
mov rsi, arg(0) ;ref_ptr ;
mov rdi, arg(2) ;src_ptr ;
movsxd rcx, dword ptr arg(4) ;Height ;
pxor mm0, mm0 ;
movq mm1, [rsi] ;
movq mm3, [rsi+1] ;
movq mm2, mm1 ;
movq mm4, mm3 ;
punpcklbw mm1, mm0 ;
punpckhbw mm2, mm0 ;
pmullw mm1, [rax] ;
pmullw mm2, [rax] ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
pmullw mm3, [rax+8] ;
pmullw mm4, [rax+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm5, mm1
packuswb mm5, mm2 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
add rsi, r8
%endif
.filter_block2d_bil_var_mmx_loop:
movq mm1, [rsi] ;
movq mm3, [rsi+1] ;
movq mm2, mm1 ;
movq mm4, mm3 ;
punpcklbw mm1, mm0 ;
punpckhbw mm2, mm0 ;
pmullw mm1, [rax] ;
pmullw mm2, [rax] ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
pmullw mm3, [rax+8] ;
pmullw mm4, [rax+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm3, mm5 ;
movq mm4, mm5 ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
movq mm5, mm1 ;
packuswb mm5, mm2 ;
pmullw mm3, [rdx] ;
pmullw mm4, [rdx] ;
pmullw mm1, [rdx+8] ;
pmullw mm2, [rdx+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
psraw mm2, mmx_filter_shift ;
movq mm3, [rdi] ;
movq mm4, mm3 ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
psubw mm1, mm3 ;
psubw mm2, mm4 ;
paddw mm6, mm1 ;
pmaddwd mm1, mm1 ;
paddw mm6, mm2 ;
pmaddwd mm2, mm2 ;
paddd mm7, mm1 ;
paddd mm7, mm2 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
add rsi, r8
add rdi, r9
%endif
sub rcx, 1 ;
jnz .filter_block2d_bil_var_mmx_loop ;
pxor mm3, mm3 ;
pxor mm2, mm2 ;
punpcklwd mm2, mm6 ;
punpckhwd mm3, mm6 ;
paddd mm2, mm3 ;
movq mm6, mm2 ;
psrlq mm6, 32 ;
paddd mm2, mm6 ;
psrad mm2, 16 ;
movq mm4, mm7 ;
psrlq mm4, 32 ;
paddd mm4, mm7 ;
mov rdi, arg(7) ;sum
mov rsi, arg(8) ;sumsquared
movd dword ptr [rdi], mm2 ;
movd dword ptr [rsi], mm4 ;
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
align 16
mmx_bi_rd:
times 4 dw 64
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx
short *filter
);
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
extern unsigned int vp8_get8x8var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern unsigned int vp8_get4x4var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
const unsigned char *ref_ptr,
@@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx
unsigned int *sumsquared
);
unsigned int vp8_variance4x4_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_variance8x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_mse16x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, sse2, sse3, var;
int sum0, sum1, sum2, sum3;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
*sse = var;
return var;
}
unsigned int vp8_variance16x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, sse2, sse3, var;
int sum0, sum1, sum2, sum3, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
avg = sum0 + sum1 + sum2 + sum3;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 8));
}
unsigned int vp8_variance16x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
@@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
}
unsigned int vp8_sub_pixel_mse16x16_mmx(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_mmx
(
const unsigned char *src_ptr,
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
unsigned int *sumsquared
);
extern unsigned int vp8_get4x4var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
unsigned int vp8_get_mb_ss_sse2
(
const short *src_ptr
);
unsigned int vp8_get16x16var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
unsigned int vp8_get8x8var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
void vp8_filter_block2d_bil_var_sse2
(
const unsigned char *ref_ptr,
@@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_variance8x8_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_variance16x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0;
int sum0;
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
*sse = sse0;
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
}
unsigned int vp8_mse16x16_wmt(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0;
int sum0;
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
*sse = sse0;
return sse0;
}
unsigned int vp8_variance16x8_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_wmt
(
const unsigned char *src_ptr,
@@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
}
unsigned int vp8_sub_pixel_mse16x16_wmt(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_wmt
(
const unsigned char *src_ptr,
+34 -33
View File
@@ -73,8 +73,8 @@ void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
else
{
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
else
QIndex = pc->base_qindex;
@@ -101,6 +101,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
int i;
#if CONFIG_ERROR_CONCEALMENT
int corruption_detected = 0;
#else
(void)mb_idx;
#endif
if (xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -140,7 +142,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
* Better to use the predictor as reconstruction.
*/
pbi->frame_corrupt_residual = 1;
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
vp8_conceal_corrupt_mb(xd);
@@ -149,7 +151,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* force idct to be skipped for B_PRED and use the
* prediction only for reconstruction
* */
vpx_memset(xd->eobs, 0, 25);
memset(xd->eobs, 0, 25);
}
}
#endif
@@ -182,7 +184,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
vpx_memset(xd->eobs, 0, 25);
memset(xd->eobs, 0, 25);
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
@@ -212,7 +214,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
(b->qcoeff[0] * DQC[0],
dst, dst_stride,
dst, dst_stride);
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
}
}
}
@@ -249,14 +251,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
xd->qcoeff);
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
}
else
{
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
xd->qcoeff);
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
}
/* override the dc dequant constant in order to preserve the
@@ -321,7 +323,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)Border; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
memcpy(dest_ptr1, src_ptr1, plane_stride);
dest_ptr1 += plane_stride;
}
@@ -336,7 +338,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
memcpy(dest_ptr1, src_ptr1, plane_stride);
dest_ptr1 += plane_stride;
}
@@ -349,7 +351,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
memcpy(dest_ptr1, src_ptr1, plane_stride);
dest_ptr1 += plane_stride;
}
}
@@ -377,7 +379,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)Border; i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr2 += plane_stride;
}
@@ -395,7 +397,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr2 += plane_stride;
}
@@ -409,7 +411,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr2 += plane_stride;
}
}
@@ -444,8 +446,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
memset(dest_ptr1, src_ptr1[0], Border);
memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
@@ -468,8 +470,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
memset(dest_ptr1, src_ptr1[0], Border);
memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
@@ -488,8 +490,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
memset(dest_ptr1, src_ptr1[0], Border);
memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
@@ -566,7 +568,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
/* reset contexts */
xd->above_context = pc->above_context;
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
xd->left_available = 0;
@@ -916,19 +918,19 @@ static void init_frame(VP8D_COMP *pbi)
if (pc->frame_type == KEY_FRAME)
{
/* Various keyframe initializations */
vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
vp8_init_mbmode_probs(pc);
vp8_default_coef_probs(pc);
/* reset the segment feature data to 0 with delta coding (Default state). */
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
/* reset the mode ref deltasa for loop filter */
vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
/* All buffers are implicitly updated on key frames. */
pc->refresh_golden_frame = 1;
@@ -1067,12 +1069,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
pc->vert_scale = clear[6] >> 6;
}
data += 7;
clear += 7;
}
else
{
vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
}
}
if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME))
@@ -1104,7 +1105,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
{
xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc);
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
/* For each segmentation feature (Quant and loop filter level) */
for (i = 0; i < MB_LVL_MAX; i++)
@@ -1128,7 +1129,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (xd->update_mb_segmentation_map)
{
/* Which macro block level features are enabled */
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
/* Read the probs used to decode the segment id for each macro block. */
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
@@ -1277,7 +1278,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
#endif
if (pc->refresh_entropy_probs == 0)
{
vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
}
pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc);
@@ -1326,7 +1327,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
/* clear out the coeff buffer */
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
vp8_decode_mode_mvs(pbi);
@@ -1340,7 +1341,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
#endif
vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
pbi->frame_corrupt_residual = 0;
#if CONFIG_MULTITHREAD
@@ -1379,7 +1380,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (pc->refresh_entropy_probs == 0)
{
vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
pbi->independent_partitions = prev_independent_partitions;
}
+2
View File
@@ -591,6 +591,8 @@ static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi,
MB_MODE_INFO *mbmi)
{
(void)mbmi;
/* Read the Macroblock segmentation map if it is being updated explicitly
* this frame (reset to 0 above by default)
* By default on a key frame reset all MBs to segment 0
+2 -2
View File
@@ -20,8 +20,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
vpx_memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
vpx_memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
/* Clear entropy contexts for Y2 blocks */
if (!x->mode_info_context->mbmi.is_4x4)
+1 -1
View File
@@ -350,7 +350,7 @@ static void estimate_missing_mvs(MB_OVERLAP *overlaps,
unsigned int first_corrupt)
{
int mb_row, mb_col;
vpx_memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
/* First calculate the overlaps for all blocks */
for (mb_row = 0; mb_row < mb_rows; ++mb_row)
{
+6 -2
View File
@@ -58,7 +58,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
if (!pbi)
return NULL;
vpx_memset(pbi, 0, sizeof(VP8D_COMP));
memset(pbi, 0, sizeof(VP8D_COMP));
if (setjmp(pbi->common.error.jmp))
{
@@ -87,6 +87,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
pbi->ec_enabled = oxcf->error_concealment;
pbi->overlaps = NULL;
#else
(void)oxcf;
pbi->ec_enabled = 0;
#endif
/* Error concealment is activated after a key frame has been
@@ -258,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
return err;
}
int check_fragments_for_errors(VP8D_COMP *pbi)
static int check_fragments_for_errors(VP8D_COMP *pbi)
{
if (!pbi->ec_active &&
pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)
@@ -303,6 +304,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
{
VP8_COMMON *cm = &pbi->common;
int retcode = -1;
(void)size;
(void)source;
pbi->common.error.error_code = VPX_CODEC_OK;
@@ -407,6 +410,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
#else
(void)flags;
if (pbi->common.frame_to_show)
{
+28 -26
View File
@@ -60,12 +60,12 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->segmentation_enabled = xd->segmentation_enabled;
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
/*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
/*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
/*unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;*/
mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
@@ -73,10 +73,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->current_bc = &pbi->mbc[0];
vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
mbd->fullpixel_mask = 0xffffffff;
@@ -96,6 +96,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
int i;
#if CONFIG_ERROR_CONCEALMENT
int corruption_detected = 0;
#else
(void)mb_idx;
#endif
if (xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -135,7 +137,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
* Better to use the predictor as reconstruction.
*/
pbi->frame_corrupt_residual = 1;
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
vp8_conceal_corrupt_mb(xd);
@@ -144,7 +146,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* force idct to be skipped for B_PRED and use the
* prediction only for reconstruction
* */
vpx_memset(xd->eobs, 0, 25);
memset(xd->eobs, 0, 25);
}
}
#endif
@@ -177,7 +179,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
vpx_memset(xd->eobs, 0, 25);
memset(xd->eobs, 0, 25);
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
@@ -227,7 +229,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
dst, dst_stride, dst, dst_stride);
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
}
}
}
@@ -264,14 +266,14 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
xd->qcoeff);
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
}
else
{
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
xd->qcoeff);
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
}
/* override the dc dequant constant in order to preserve the
@@ -358,7 +360,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
/* reset contexts */
xd->above_context = pc->above_context;
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
xd->left_available = 0;
@@ -497,9 +499,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
if( mb_row != pc->mb_rows-1 )
{
/* Save decoded MB last row data for next-row decoding */
vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
}
/* save left_col for next MB decoding */
@@ -874,23 +876,23 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
if (filter_level)
{
/* Set above_row buffer to 127 for decoding first MB row */
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
for (j=1; j<pc->mb_rows; j++)
{
vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
}
/* Set left_col to 129 initially */
for (j=0; j<pc->mb_rows; j++)
{
vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
}
/* Initialize the loop filter for this frame. */
@@ -1,310 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_start_encode|
EXPORT |vp8_encode_bool|
EXPORT |vp8_stop_encode|
EXPORT |vp8_encode_value|
IMPORT |vp8_validate_buffer_arm|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 BOOL_CODER *br
; r1 unsigned char *source
; r2 unsigned char *source_end
|vp8_start_encode| PROC
str r2, [r0, #vp8_writer_buffer_end]
mov r12, #0
mov r3, #255
mvn r2, #23
str r12, [r0, #vp8_writer_lowvalue]
str r3, [r0, #vp8_writer_range]
str r2, [r0, #vp8_writer_count]
str r12, [r0, #vp8_writer_pos]
str r1, [r0, #vp8_writer_buffer]
bx lr
ENDP
; r0 BOOL_CODER *br
; r1 int bit
; r2 int probability
|vp8_encode_bool| PROC
push {r4-r10, lr}
mov r4, r2
ldr r2, [r0, #vp8_writer_lowvalue]
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
sub r7, r5, #1 ; range-1
cmp r1, #0
mul r6, r4, r7 ; ((range-1) * probability)
mov r7, #1
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
addne r2, r2, r4 ; if (bit) lowvalue += split
subne r4, r5, r4 ; if (bit) range = range-split
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start
token_zero_while_loop
mov r9, #0
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r1, [r7, r4]
cmpge r1, #0xff
beq token_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r9, [r7, r4] ; w->buffer[x]
add r9, r9, #1
strb r9, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r9, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r1, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r1 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
pop {r4-r10, pc}
ENDP
; r0 BOOL_CODER *br
|vp8_stop_encode| PROC
push {r4-r10, lr}
ldr r2, [r0, #vp8_writer_lowvalue]
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
mov r10, #32
stop_encode_loop
sub r7, r5, #1 ; range-1
mov r4, r7, lsl #7 ; ((range-1) * 128)
mov r7, #1
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero_se ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set_se
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start_se
token_zero_while_loop_se
mov r9, #0
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start_se
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r1, [r7, r4]
cmpge r1, #0xff
beq token_zero_while_loop_se
ldr r7, [r0, #vp8_writer_buffer]
ldrb r9, [r7, r4] ; w->buffer[x]
add r9, r9, #1
strb r9, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set_se
rsb r4, r6, #24 ; 24-offset
ldr r9, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r1, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r1 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
lsl r2, r2, r6 ; lowvalue <<= shift
subs r10, r10, #1
bne stop_encode_loop
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
pop {r4-r10, pc}
ENDP
; r0 BOOL_CODER *br
; r1 int data
; r2 int bits
|vp8_encode_value| PROC
push {r4-r12, lr}
mov r10, r2
ldr r2, [r0, #vp8_writer_lowvalue]
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
rsb r4, r10, #32 ; 32-n
; v is kept in r1 during the token pack loop
lsl r1, r1, r4 ; r1 = v << 32 - n
encode_value_loop
sub r7, r5, #1 ; range-1
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
lsls r1, r1, #1 ; bit = v >> n
mov r4, r7, lsl #7 ; ((range-1) * 128)
mov r7, #1
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
addcs r2, r2, r4 ; if (bit) lowvalue += split
subcs r4, r5, r4 ; if (bit) range = range-split
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero_ev ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set_ev
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start_ev
token_zero_while_loop_ev
mov r9, #0
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start_ev
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq token_zero_while_loop_ev
ldr r7, [r0, #vp8_writer_buffer]
ldrb r9, [r7, r4] ; w->buffer[x]
add r9, r9, #1
strb r9, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set_ev
rsb r4, r6, #24 ; 24-offset
ldr r9, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r11 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_ev
lsl r2, r2, r6 ; lowvalue <<= shift
subs r10, r10, #1
bne encode_value_loop
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
pop {r4-r12, pc}
ENDP
END
@@ -1,317 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8cx_pack_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 vp8_writer *w
; r1 const TOKENEXTRA *p
; r2 int xcount
; r3 vp8_coef_encodings
; s0 vp8_extra_bits
; s1 vp8_coef_tree
|vp8cx_pack_tokens_armv5| PROC
push {r4-r12, lr}
sub sp, sp, #16
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
; sizeof (TOKENEXTRA) is 8
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
str r2, [sp, #0]
str r3, [sp, #8] ; save vp8_coef_encodings
ldr r2, [r0, #vp8_writer_lowvalue]
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
b check_p_lt_stop
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #8] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
; vp8 specific skip_eob_node
cmp r7, #0
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #60] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
sub r7, r5, #1 ; range-1
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
lsls r12, r12, #1 ; bb = v >> n
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
; if bb == 1, otherwise it will act like i + 0
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start
token_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq token_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4] ; w->buffer[x]
add r10, r10, #1
strb r10, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
ldr r10, [sp, #60] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #56] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
beq skip_extra_bits
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits
ldr r9, [r12, #vp8_extra_bit_struct_prob]
asr r7, lr, #1 ; v=e>>1
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
rsb r4, r8, #32
lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
lsls r12, r12, #1 ; v >> n
mul r6, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
clz r6, r4
sub r6, r6, #24
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi extra_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset= shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl extra_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos - 1
b extra_zero_while_start
extra_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
extra_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq extra_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4]
add r10, r10, #1
strb r10, [r7, r4]
extra_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos]
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
lsl r2, r2, r6
subs r8, r8, #1 ; --n
bne extra_bits_loop ; while (n)
no_extra_bits
ldr lr, [r1, #4] ; e = p->Extra
add r4, r5, #1 ; range + 1
tst lr, #1
lsr r4, r4, #1 ; split = (range + 1) >> 1
addne r2, r2, r4 ; lowvalue += split
subne r4, r5, r4 ; range = range-split
tst r2, #0x80000000 ; lowvalue & 0x80000000
lsl r5, r4, #1 ; range <<= 1
beq end_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos]
mov r7, #0
sub r4, r4, #1
b end_zero_while_start
end_zero_while_loop
strb r7, [r6, r4]
sub r4, r4, #1 ; x--
end_zero_while_start
cmp r4, #0
ldrge r6, [r0, #vp8_writer_buffer]
ldrb r12, [r6, r4]
cmpge r12, #0xff
beq end_zero_while_loop
ldr r6, [r0, #vp8_writer_buffer]
ldrb r7, [r6, r4]
add r7, r7, #1
strb r7, [r6, r4]
end_high_bit_not_set
adds r3, r3, #1 ; ++count
lsl r2, r2, #1 ; lowvalue <<= 1
bne end_count_zero
ldr r4, [r0, #vp8_writer_pos]
mvn r3, #7
ldr r7, [r0, #vp8_writer_buffer]
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
add r1, r1, #TOKENEXTRA_SZ ; ++p
check_p_lt_stop
ldr r4, [sp, #0] ; stop
cmp r1, r4 ; while( p < stop)
bcc while_p_lt_stop
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
add sp, sp, #16
pop {r4-r12, pc}
ENDP
END
@@ -1,352 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 VP8_COMP *cpi
; r1 vp8_writer *w
; r2 vp8_coef_encodings
; r3 vp8_extra_bits
; s0 vp8_coef_tree
|vp8cx_pack_mb_row_tokens_armv5| PROC
push {r4-r12, lr}
sub sp, sp, #24
; Compute address of cpi->common.mb_rows
ldr r4, _VP8_COMP_common_
ldr r6, _VP8_COMMON_MBrows_
add r4, r0, r4
ldr r5, [r4, r6] ; load up mb_rows
str r2, [sp, #20] ; save vp8_coef_encodings
str r5, [sp, #12] ; save mb_rows
str r3, [sp, #8] ; save vp8_extra_bits
ldr r4, _VP8_COMP_tplist_
add r4, r0, r4
ldr r7, [r4, #0] ; dereference cpi->tp_list
mov r0, r1 ; keep same as other loops
ldr r2, [r0, #vp8_writer_lowvalue]
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
mb_row_loop
ldr r1, [r7, #tokenlist_start]
ldr r9, [r7, #tokenlist_stop]
str r9, [sp, #0] ; save stop for later comparison
str r7, [sp, #16] ; tokenlist address for next time
b check_p_lt_stop
; actuall work gets done here!
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #20] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
; vp8 specific skip_eob_node
cmp r7, #0
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #64] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
sub r7, r5, #1 ; range-1
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
lsls r12, r12, #1 ; bb = v >> n
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
; if bb == 1, otherwise it will act like i + 0
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start
token_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq token_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4] ; w->buffer[x]
add r10, r10, #1
strb r10, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
ldr r10, [sp, #64] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #8] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
beq skip_extra_bits
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits
ldr r9, [r12, #vp8_extra_bit_struct_prob]
asr r7, lr, #1 ; v=e>>1
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
rsb r4, r8, #32
lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
lsls r12, r12, #1 ; v >> n
mul r6, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
clz r6, r4
sub r6, r6, #24
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi extra_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset= shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl extra_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos - 1
b extra_zero_while_start
extra_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
extra_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq extra_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4]
add r10, r10, #1
strb r10, [r7, r4]
extra_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos]
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
lsl r2, r2, r6
subs r8, r8, #1 ; --n
bne extra_bits_loop ; while (n)
no_extra_bits
ldr lr, [r1, #4] ; e = p->Extra
add r4, r5, #1 ; range + 1
tst lr, #1
lsr r4, r4, #1 ; split = (range + 1) >> 1
addne r2, r2, r4 ; lowvalue += split
subne r4, r5, r4 ; range = range-split
tst r2, #0x80000000 ; lowvalue & 0x80000000
lsl r5, r4, #1 ; range <<= 1
beq end_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos]
mov r7, #0
sub r4, r4, #1
b end_zero_while_start
end_zero_while_loop
strb r7, [r6, r4]
sub r4, r4, #1 ; x--
end_zero_while_start
cmp r4, #0
ldrge r6, [r0, #vp8_writer_buffer]
ldrb r12, [r6, r4]
cmpge r12, #0xff
beq end_zero_while_loop
ldr r6, [r0, #vp8_writer_buffer]
ldrb r7, [r6, r4]
add r7, r7, #1
strb r7, [r6, r4]
end_high_bit_not_set
adds r3, r3, #1 ; ++count
lsl r2, r2, #1 ; lowvalue <<= 1
bne end_count_zero
ldr r4, [r0, #vp8_writer_pos]
mvn r3, #7
ldr r7, [r0, #vp8_writer_buffer]
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
add r1, r1, #TOKENEXTRA_SZ ; ++p
check_p_lt_stop
ldr r4, [sp, #0] ; stop
cmp r1, r4 ; while( p < stop)
bcc while_p_lt_stop
ldr r6, [sp, #12] ; mb_rows
ldr r7, [sp, #16] ; tokenlist address
subs r6, r6, #1
add r7, r7, #TOKENLIST_SZ ; next element in the array
str r6, [sp, #12]
bne mb_row_loop
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
add sp, sp, #24
pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
DCD vp8_comp_common
_VP8_COMMON_MBrows_
DCD vp8_common_mb_rows
_VP8_COMP_tplist_
DCD vp8_comp_tplist
END
@@ -1,471 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 VP8_COMP *cpi
; r1 unsigned char *cx_data
; r2 const unsigned char *cx_data_end
; r3 int num_part
; s0 vp8_coef_encodings
; s1 vp8_extra_bits,
; s2 const vp8_tree_index *
|vp8cx_pack_tokens_into_partitions_armv5| PROC
push {r4-r12, lr}
sub sp, sp, #40
; Compute address of cpi->common.mb_rows
ldr r4, _VP8_COMP_common_
ldr r6, _VP8_COMMON_MBrows_
add r4, r0, r4
ldr r5, [r4, r6] ; load up mb_rows
str r5, [sp, #36] ; save mb_rows
str r1, [sp, #24] ; save ptr = cx_data
str r3, [sp, #20] ; save num_part
str r2, [sp, #8] ; save cx_data_end
ldr r4, _VP8_COMP_tplist_
add r4, r0, r4
ldr r7, [r4, #0] ; dereference cpi->tp_list
str r7, [sp, #32] ; store start of cpi->tp_list
ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
add r0, r0, r11
mov r11, #0
str r11, [sp, #28] ; i
numparts_loop
ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
add r0, r2 ; bc[i + 1]
ldr r10, [sp, #24] ; ptr
ldr r5, [sp, #36] ; move mb_rows to the counting section
subs r5, r5, r11 ; move start point with each partition
; mb_rows starts at i
str r5, [sp, #12]
; Reset all of the VP8 Writer data for each partition that
; is processed.
; start_encode
ldr r3, [sp, #8]
str r3, [r0, #vp8_writer_buffer_end]
mov r2, #0 ; vp8_writer_lowvalue
mov r5, #255 ; vp8_writer_range
mvn r3, #23 ; vp8_writer_count
str r2, [r0, #vp8_writer_pos]
str r10, [r0, #vp8_writer_buffer]
ble end_partition ; if (mb_rows <= 0) end partition
mb_row_loop
ldr r1, [r7, #tokenlist_start]
ldr r9, [r7, #tokenlist_stop]
str r9, [sp, #0] ; save stop for later comparison
str r7, [sp, #16] ; tokenlist address for next time
b check_p_lt_stop
; actual work gets done here!
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #80] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
; vp8 specific skip_eob_node
cmp r7, #0
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #88] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
sub r7, r5, #1 ; range-1
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
lsls r12, r12, #1 ; bb = v >> n
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
; if bb == 1, otherwise it will act like i + 0
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start
token_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq token_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4] ; w->buffer[x]
add r10, r10, #1
strb r10, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
ldr r10, [sp, #88] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #84] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
beq skip_extra_bits
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits
ldr r9, [r12, #vp8_extra_bit_struct_prob]
asr r7, lr, #1 ; v=e>>1
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
rsb r4, r8, #32
lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
lsls r12, r12, #1 ; v >> n
mul r6, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
mov r7, #1
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
addcs r2, r2, r4 ; if (bb) lowvalue += split
subcs r4, r5, r4 ; if (bb) range = range-split
clz r6, r4
sub r6, r6, #24
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi extra_count_lt_zero ; if(count >= 0)
sub r6, r6, r3 ; offset= shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl extra_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos - 1
b extra_zero_while_start
extra_zero_while_loop
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
extra_zero_while_start
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq extra_zero_while_loop
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4]
add r10, r10, #1
strb r10, [r7, r4]
extra_high_bit_not_set
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos]
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
lsl r2, r2, r6
subs r8, r8, #1 ; --n
bne extra_bits_loop ; while (n)
no_extra_bits
ldr lr, [r1, #4] ; e = p->Extra
add r4, r5, #1 ; range + 1
tst lr, #1
lsr r4, r4, #1 ; split = (range + 1) >> 1
addne r2, r2, r4 ; lowvalue += split
subne r4, r5, r4 ; range = range-split
tst r2, #0x80000000 ; lowvalue & 0x80000000
lsl r5, r4, #1 ; range <<= 1
beq end_high_bit_not_set
ldr r4, [r0, #vp8_writer_pos]
mov r7, #0
sub r4, r4, #1
b end_zero_while_start
end_zero_while_loop
strb r7, [r6, r4]
sub r4, r4, #1 ; x--
end_zero_while_start
cmp r4, #0
ldrge r6, [r0, #vp8_writer_buffer]
ldrb r12, [r6, r4]
cmpge r12, #0xff
beq end_zero_while_loop
ldr r6, [r0, #vp8_writer_buffer]
ldrb r7, [r6, r4]
add r7, r7, #1
strb r7, [r6, r4]
end_high_bit_not_set
adds r3, r3, #1 ; ++count
lsl r2, r2, #1 ; lowvalue <<= 1
bne end_count_zero
ldr r4, [r0, #vp8_writer_pos]
mvn r3, #7 ; count = -8
ldr r7, [r0, #vp8_writer_buffer]
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
add r1, r1, #TOKENEXTRA_SZ ; ++p
check_p_lt_stop
ldr r4, [sp, #0] ; stop
cmp r1, r4 ; while( p < stop)
bcc while_p_lt_stop
ldr r10, [sp, #20] ; num_parts
mov r1, #TOKENLIST_SZ
mul r1, r10, r1
ldr r6, [sp, #12] ; mb_rows
ldr r7, [sp, #16] ; tokenlist address
subs r6, r6, r10
add r7, r7, r1 ; next element in the array
str r6, [sp, #12]
bgt mb_row_loop
end_partition
mov r12, #32
stop_encode_loop
sub r7, r5, #1 ; range-1
mov r4, r7, lsl #7 ; ((range-1) * 128)
mov r7, #1
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
; Counting the leading zeros is used to normalize range.
clz r6, r4
sub r6, r6, #24 ; shift
; Flag is set on the sum of count. This flag is used later
; to determine if count >= 0
adds r3, r3, r6 ; count += shift
lsl r5, r4, r6 ; range <<= shift
bmi token_count_lt_zero_se ; if(count >= 0)
sub r6, r6, r3 ; offset = shift - count
sub r4, r6, #1 ; offset-1
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
bpl token_high_bit_not_set_se
ldr r4, [r0, #vp8_writer_pos] ; x
sub r4, r4, #1 ; x = w->pos-1
b token_zero_while_start_se
token_zero_while_loop_se
mov r10, #0
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
sub r4, r4, #1 ; x--
token_zero_while_start_se
cmp r4, #0
ldrge r7, [r0, #vp8_writer_buffer]
ldrb r11, [r7, r4]
cmpge r11, #0xff
beq token_zero_while_loop_se
ldr r7, [r0, #vp8_writer_buffer]
ldrb r10, [r7, r4] ; w->buffer[x]
add r10, r10, #1
strb r10, [r7, r4] ; w->buffer[x] + 1
token_high_bit_not_set_se
rsb r4, r6, #24 ; 24-offset
ldr r10, [r0, #vp8_writer_buffer]
lsr r7, r2, r4 ; lowvalue >> (24-offset)
ldr r4, [r0, #vp8_writer_pos] ; w->pos
lsl r2, r2, r6 ; lowvalue <<= offset
mov r6, r3 ; shift = count
add r11, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
lsl r2, r2, r6 ; lowvalue <<= shift
subs r12, r12, #1
bne stop_encode_loop
ldr r4, [r0, #vp8_writer_pos] ; w->pos
ldr r12, [sp, #24] ; ptr
add r12, r12, r4 ; ptr += w->pos
str r12, [sp, #24]
ldr r11, [sp, #28] ; i
ldr r10, [sp, #20] ; num_parts
add r11, r11, #1 ; i++
str r11, [sp, #28]
ldr r7, [sp, #32] ; cpi->tp_list[i]
mov r1, #TOKENLIST_SZ
add r7, r7, r1 ; next element in cpi->tp_list
str r7, [sp, #32] ; cpi->tp_list[i+1]
cmp r10, r11
bgt numparts_loop
add sp, sp, #40
pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
DCD vp8_comp_common
_VP8_COMMON_MBrows_
DCD vp8_common_mb_rows
_VP8_COMP_tplist_
DCD vp8_comp_tplist
_VP8_COMP_bc_
DCD vp8_comp_bc
_vp8_writer_sz_
DCD vp8_writer_sz
END
@@ -1,225 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_fast_quantize_b_armv6|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 BLOCK *b
; r1 BLOCKD *d
|vp8_fast_quantize_b_armv6| PROC
stmfd sp!, {r1, r4-r11, lr}
ldr r3, [r0, #vp8_block_coeff] ; coeff
ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
ldr r5, [r0, #vp8_block_round] ; round
ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
ldr r8, [r1, #vp8_blockd_dequant] ; dequant
ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
; is used to update the counter so that
; it can be used to mark nonzero
; quantized coefficient pairs.
mov r1, #0 ; flags for quantized coeffs
; PART 1: quantization and dequantization loop
loop
ldr r9, [r3], #4 ; [z1 | z0]
ldr r10, [r5], #4 ; [r1 | r0]
ldr r11, [r4], #4 ; [q1 | q0]
ssat16 lr, #1, r9 ; [sz1 | sz0]
eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
ssub16 r9, r9, lr ; x = (z ^ sz) - sz
sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
ldr r12, [r3], #4 ; [z3 | z2]
smulbb r0, r9, r11 ; [(x0+r0)*q0]
smultt r9, r9, r11 ; [(x1+r1)*q1]
ldr r10, [r5], #4 ; [r3 | r2]
ssat16 r11, #1, r12 ; [sz3 | sz2]
eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
ldr r9, [r4], #4 ; [q3 | q2]
ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
smulbb r10, r12, r9 ; [(x2+r2)*q2]
smultt r12, r12, r9 ; [(x3+r3)*q3]
ssub16 r0, r0, lr ; x = (y ^ sz) - sz
cmp r0, #0 ; check if zero
orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
str r0, [r6], #4 ; *qcoeff++ = x
ldr r9, [r8], #4 ; [dq1 | dq0]
pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
cmp r10, #0 ; check if zero
orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
str r10, [r6], #4 ; *qcoeff++ = x
ldr r11, [r8], #4 ; [dq3 | dq2]
smulbb r12, r0, r9 ; [x0*dq0]
smultt r0, r0, r9 ; [x1*dq1]
smulbb r9, r10, r11 ; [x2*dq2]
smultt r10, r10, r11 ; [x3*dq3]
lsls r2, r2, #2 ; update loop counter
strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
add r7, r7, #8 ; dqcoeff += 8
bne loop
; PART 2: check position for eob...
ldr r11, [sp, #0] ; restore BLOCKD pointer
mov lr, #0 ; init eob
cmp r1, #0 ; coeffs after quantization?
ldr r12, [r11, #vp8_blockd_eob]
beq end ; skip eob calculations if all zero
ldr r0, [r11, #vp8_blockd_qcoeff]
; check shortcut for nonzero qcoeffs
tst r1, #0x80
bne quant_coeff_15_14
tst r1, #0x20
bne quant_coeff_13_11
tst r1, #0x8
bne quant_coeff_12_7
tst r1, #0x40
bne quant_coeff_10_9
tst r1, #0x10
bne quant_coeff_8_3
tst r1, #0x2
bne quant_coeff_6_5
tst r1, #0x4
bne quant_coeff_4_2
b quant_coeff_1_0
quant_coeff_15_14
ldrh r2, [r0, #30] ; rc=15, i=15
mov lr, #16
cmp r2, #0
bne end
ldrh r3, [r0, #28] ; rc=14, i=14
mov lr, #15
cmp r3, #0
bne end
quant_coeff_13_11
ldrh r2, [r0, #22] ; rc=11, i=13
mov lr, #14
cmp r2, #0
bne end
quant_coeff_12_7
ldrh r3, [r0, #14] ; rc=7, i=12
mov lr, #13
cmp r3, #0
bne end
ldrh r2, [r0, #20] ; rc=10, i=11
mov lr, #12
cmp r2, #0
bne end
quant_coeff_10_9
ldrh r3, [r0, #26] ; rc=13, i=10
mov lr, #11
cmp r3, #0
bne end
ldrh r2, [r0, #24] ; rc=12, i=9
mov lr, #10
cmp r2, #0
bne end
quant_coeff_8_3
ldrh r3, [r0, #18] ; rc=9, i=8
mov lr, #9
cmp r3, #0
bne end
ldrh r2, [r0, #12] ; rc=6, i=7
mov lr, #8
cmp r2, #0
bne end
quant_coeff_6_5
ldrh r3, [r0, #6] ; rc=3, i=6
mov lr, #7
cmp r3, #0
bne end
ldrh r2, [r0, #4] ; rc=2, i=5
mov lr, #6
cmp r2, #0
bne end
quant_coeff_4_2
ldrh r3, [r0, #10] ; rc=5, i=4
mov lr, #5
cmp r3, #0
bne end
ldrh r2, [r0, #16] ; rc=8, i=3
mov lr, #4
cmp r2, #0
bne end
ldrh r3, [r0, #8] ; rc=4, i=2
mov lr, #3
cmp r3, #0
bne end
quant_coeff_1_0
ldrh r2, [r0, #2] ; rc=1, i=1
mov lr, #2
cmp r2, #0
bne end
mov lr, #1 ; rc=0, i=0
end
strb lr, [r12]
ldmfd sp!, {r1, r4-r11, pc}
ENDP
loop_count
DCD 0x1000000
END
@@ -1,138 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_mse16x16_armv6|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
;
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
; So, we can remove this part of calculation.
|vp8_mse16x16_armv6| PROC
push {r4-r9, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #16 ; set loop counter to 16 (=block height)
mov r4, #0 ; initialize sse = 0
loop
; 1st 4 pixels
ldr r5, [r0, #0x0] ; load 4 src pixels
ldr r6, [r2, #0x0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r8, r5, r6 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0x4] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r6, [r2, #0x4] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0x8] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr r6, [r2, #0x8] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0xc] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr r6, [r2, #0xc] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
subs r12, r12, #1 ; next row
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
bne loop
; return stuff
ldr r1, [sp, #28] ; get address of sse
mov r0, r4 ; return sse
str r4, [r1] ; store sse
pop {r4-r9, pc}
ENDP
END
@@ -1,272 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_subtract_mby_armv6|
EXPORT |vp8_subtract_mbuv_armv6|
EXPORT |vp8_subtract_b_armv6|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 BLOCK *be
; r1 BLOCKD *bd
; r2 int pitch
|vp8_subtract_b_armv6| PROC
stmfd sp!, {r4-r9}
ldr r4, [r0, #vp8_block_base_src]
ldr r5, [r0, #vp8_block_src]
ldr r6, [r0, #vp8_block_src_diff]
ldr r3, [r4]
ldr r7, [r0, #vp8_block_src_stride]
add r3, r3, r5 ; src = *base_src + src
ldr r8, [r1, #vp8_blockd_predictor]
mov r9, #4 ; loop count
loop_block
ldr r0, [r3], r7 ; src
ldr r1, [r8], r2 ; pred
uxtb16 r4, r0 ; [s2 | s0]
uxtb16 r5, r1 ; [p2 | p0]
uxtb16 r0, r0, ror #8 ; [s3 | s1]
uxtb16 r1, r1, ror #8 ; [p3 | p1]
usub16 r4, r4, r5 ; [d2 | d0]
usub16 r5, r0, r1 ; [d3 | d1]
subs r9, r9, #1 ; decrement loop counter
pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
str r0, [r6, #0] ; diff
str r1, [r6, #4] ; diff
add r6, r6, r2, lsl #1 ; update diff pointer
bne loop_block
ldmfd sp!, {r4-r9}
mov pc, lr
ENDP
; r0 short *diff
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
; r3 int src_stride
; sp unsigned char *upred
; sp unsigned char *vpred
; sp int pred_stride
|vp8_subtract_mbuv_armv6| PROC
stmfd sp!, {r4-r11}
add r0, r0, #512 ; set *diff point to Cb
mov r4, #8 ; loop count
ldr r5, [sp, #32] ; upred
ldr r12, [sp, #40] ; pred_stride
; Subtract U block
loop_u
ldr r6, [r1] ; usrc (A)
ldr r7, [r5] ; upred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
ldr r10, [r1, #4] ; usrc (B)
ldr r11, [r5, #4] ; upred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
str r8, [r0], #4 ; diff (A)
uxtb16 r8, r10 ; [s2 | s0] (B)
str r9, [r0], #4 ; diff (A)
uxtb16 r9, r11 ; [p2 | p0] (B)
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
add r1, r1, r3 ; update usrc pointer
add r5, r5, r12 ; update upred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
str r8, [r0], #4 ; diff (B)
subs r4, r4, #1 ; update loop counter
str r9, [r0], #4 ; diff (B)
bne loop_u
ldr r5, [sp, #36] ; vpred
mov r4, #8 ; loop count
; Subtract V block
loop_v
ldr r6, [r2] ; vsrc (A)
ldr r7, [r5] ; vpred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
ldr r10, [r2, #4] ; vsrc (B)
ldr r11, [r5, #4] ; vpred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
str r8, [r0], #4 ; diff (A)
uxtb16 r8, r10 ; [s2 | s0] (B)
str r9, [r0], #4 ; diff (A)
uxtb16 r9, r11 ; [p2 | p0] (B)
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
add r2, r2, r3 ; update vsrc pointer
add r5, r5, r12 ; update vpred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
str r8, [r0], #4 ; diff (B)
subs r4, r4, #1 ; update loop counter
str r9, [r0], #4 ; diff (B)
bne loop_v
ldmfd sp!, {r4-r11}
bx lr
ENDP
; r0 short *diff
; r1 unsigned char *src
; r2 int src_stride
; r3 unsigned char *pred
; sp int pred_stride
|vp8_subtract_mby_armv6| PROC
stmfd sp!, {r4-r11}
ldr r12, [sp, #32] ; pred_stride
mov r4, #16
loop
ldr r6, [r1] ; src (A)
ldr r7, [r3] ; pred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
ldr r10, [r1, #4] ; src (B)
ldr r11, [r3, #4] ; pred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
str r8, [r0], #4 ; diff (A)
uxtb16 r8, r10 ; [s2 | s0] (B)
str r9, [r0], #4 ; diff (A)
uxtb16 r9, r11 ; [p2 | p0] (B)
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
ldr r10, [r1, #8] ; src (C)
ldr r11, [r3, #8] ; pred (C)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
str r8, [r0], #4 ; diff (B)
uxtb16 r8, r10 ; [s2 | s0] (C)
str r9, [r0], #4 ; diff (B)
uxtb16 r9, r11 ; [p2 | p0] (C)
uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
usub16 r6, r8, r9 ; [d2 | d0] (C)
usub16 r7, r10, r11 ; [d3 | d1] (C)
ldr r10, [r1, #12] ; src (D)
ldr r11, [r3, #12] ; pred (D)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
str r8, [r0], #4 ; diff (C)
uxtb16 r8, r10 ; [s2 | s0] (D)
str r9, [r0], #4 ; diff (C)
uxtb16 r9, r11 ; [p2 | p0] (D)
uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
usub16 r6, r8, r9 ; [d2 | d0] (D)
usub16 r7, r10, r11 ; [d3 | d1] (D)
add r1, r1, r2 ; update src pointer
add r3, r3, r12 ; update pred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
str r8, [r0], #4 ; diff (D)
subs r4, r4, #1 ; update loop counter
str r9, [r0], #4 ; diff (D)
bne loop
ldmfd sp!, {r4-r11}
bx lr
ENDP
END
@@ -1,41 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp8/encoder/boolhuff.h"
#include "vpx/internal/vpx_codec_internal.h"
const unsigned int vp8_prob_cost[256] =
{
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
};
int vp8_validate_buffer_arm(const unsigned char *start,
size_t len,
const unsigned char *end,
struct vpx_internal_error_info *error)
{
return validate_buffer(start, len, end, error);
}
@@ -1,258 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_fast_quantize_b_neon|
EXPORT |vp8_fast_quantize_b_pair_neon|
INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=4
;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
|vp8_fast_quantize_b_pair_neon| PROC
stmfd sp!, {r4-r9}
vstmdb sp!, {q4-q7}
ldr r4, [r0, #vp8_block_coeff]
ldr r5, [r0, #vp8_block_quant_fast]
ldr r6, [r0, #vp8_block_round]
vld1.16 {q0, q1}, [r4@128] ; load z
ldr r7, [r2, #vp8_blockd_qcoeff]
vabs.s16 q4, q0 ; calculate x = abs(z)
vabs.s16 q5, q1
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
vshr.s16 q2, q0, #15 ; sz
vshr.s16 q3, q1, #15
vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15]
vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15]
ldr r4, [r1, #vp8_block_coeff]
vadd.s16 q4, q6 ; x + Round
vadd.s16 q5, q7
vld1.16 {q0, q1}, [r4@128] ; load z2
vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16 q5, q9
vabs.s16 q10, q0 ; calculate x2 = abs(z_2)
vabs.s16 q11, q1
vshr.s16 q12, q0, #15 ; sz2
vshr.s16 q13, q1, #15
;modify data to have its original sign
veor.s16 q4, q2 ; y^sz
veor.s16 q5, q3
vadd.s16 q10, q6 ; x2 + Round
vadd.s16 q11, q7
ldr r8, [r2, #vp8_blockd_dequant]
vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16 q11, q9
vshr.s16 q4, #1 ; right shift 1 after vqdmulh
vshr.s16 q5, #1
vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i]
vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16 q5, q3
vshr.s16 q10, #1 ; right shift 1 after vqdmulh
vshr.s16 q11, #1
ldr r9, [r2, #vp8_blockd_dqcoeff]
veor.s16 q10, q12 ; y2^sz2
veor.s16 q11, q13
vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1
vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16 q11, q13
ldr r6, [r3, #vp8_blockd_qcoeff]
vmul.s16 q2, q6, q4 ; x * Dequant
vmul.s16 q3, q7, q5
adr r0, inv_zig_zag ; load ptr of inverse zigzag table
vceq.s16 q8, q8 ; set q8 to all 1
vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2
vmul.s16 q12, q6, q10 ; x2 * Dequant
vmul.s16 q13, q7, q11
vld1.16 {q6, q7}, [r0@128] ; load inverse scan order
vtst.16 q14, q4, q8 ; now find eob
vtst.16 q15, q5, q8 ; non-zero element is set to all 1
vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant
ldr r7, [r3, #vp8_blockd_dqcoeff]
vand q0, q6, q14 ; get all valid numbers from scan array
vand q1, q7, q15
vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant
vtst.16 q2, q10, q8 ; now find eob
vtst.16 q3, q11, q8 ; non-zero element is set to all 1
vmax.u16 q0, q0, q1 ; find maximum value in q0, q1
vand q10, q6, q2 ; get all valid numbers from scan array
vand q11, q7, q3
vmax.u16 q10, q10, q11 ; find maximum value in q10, q11
vmax.u16 d0, d0, d1
vmax.u16 d20, d20, d21
vmovl.u16 q0, d0
vmovl.u16 q10, d20
vmax.u32 d0, d0, d1
vmax.u32 d20, d20, d21
vpmax.u32 d0, d0, d0
vpmax.u32 d20, d20, d20
ldr r4, [r2, #vp8_blockd_eob]
ldr r5, [r3, #vp8_blockd_eob]
vst1.8 {d0[0]}, [r4] ; store eob
vst1.8 {d20[0]}, [r5] ; store eob
vldmia sp!, {q4-q7}
ldmfd sp!, {r4-r9}
bx lr
ENDP
;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|vp8_fast_quantize_b_neon| PROC
stmfd sp!, {r4-r7}
ldr r3, [r0, #vp8_block_coeff]
ldr r4, [r0, #vp8_block_quant_fast]
ldr r5, [r0, #vp8_block_round]
vld1.16 {q0, q1}, [r3@128] ; load z
vorr.s16 q14, q0, q1 ; check if all zero (step 1)
ldr r6, [r1, #vp8_blockd_qcoeff]
ldr r7, [r1, #vp8_blockd_dqcoeff]
vorr.s16 d28, d28, d29 ; check if all zero (step 2)
vabs.s16 q12, q0 ; calculate x = abs(z)
vabs.s16 q13, q1
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
vshr.s16 q2, q0, #15 ; sz
vmov r2, r3, d28 ; check if all zero (step 3)
vshr.s16 q3, q1, #15
vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]
vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]
vadd.s16 q12, q14 ; x + Round
vadd.s16 q13, q15
adr r0, inv_zig_zag ; load ptr of inverse zigzag table
vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16 q13, q9
vld1.16 {q10, q11}, [r0@128]; load inverse scan order
vceq.s16 q8, q8 ; set q8 to all 1
ldr r4, [r1, #vp8_blockd_dequant]
vshr.s16 q12, #1 ; right shift 1 after vqdmulh
vshr.s16 q13, #1
ldr r5, [r1, #vp8_blockd_eob]
orr r2, r2, r3 ; check if all zero (step 4)
cmp r2, #0 ; check if all zero (step 5)
beq zero_output ; check if all zero (step 6)
;modify data to have its original sign
veor.s16 q12, q2 ; y^sz
veor.s16 q13, q3
vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16 q13, q3
vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i]
vtst.16 q14, q12, q8 ; now find eob
vtst.16 q15, q13, q8 ; non-zero element is set to all 1
vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1
vand q10, q10, q14 ; get all valid numbers from scan array
vand q11, q11, q15
vmax.u16 q0, q10, q11 ; find maximum value in q0, q1
vmax.u16 d0, d0, d1
vmovl.u16 q0, d0
vmul.s16 q2, q12 ; x * Dequant
vmul.s16 q3, q13
vmax.u32 d0, d0, d1
vpmax.u32 d0, d0, d0
vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
vst1.8 {d0[0]}, [r5] ; store eob
ldmfd sp!, {r4-r7}
bx lr
zero_output
strb r2, [r5] ; store eob
vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
ldmfd sp!, {r4-r7}
bx lr
ENDP
; default inverse zigzag table is defined in vp8/common/entropy.c
ALIGN 16 ; enable use of @128 bit aligned loads
inv_zig_zag
DCW 0x0001, 0x0002, 0x0006, 0x0007
DCW 0x0003, 0x0005, 0x0008, 0x000d
DCW 0x0004, 0x0009, 0x000c, 0x000e
DCW 0x000a, 0x000b, 0x000f, 0x0010
END
@@ -0,0 +1,89 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "vp8/encoder/block.h"
static const uint16_t inv_zig_zag[16] = {
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
};
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
const int16x8_t one_q = vdupq_n_s16(-1),
z0 = vld1q_s16(b->coeff),
z1 = vld1q_s16(b->coeff + 8),
round0 = vld1q_s16(b->round),
round1 = vld1q_s16(b->round + 8),
quant0 = vld1q_s16(b->quant_fast),
quant1 = vld1q_s16(b->quant_fast + 8),
dequant0 = vld1q_s16(d->dequant),
dequant1 = vld1q_s16(d->dequant + 8);
const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag),
zig_zag1 = vld1q_u16(inv_zig_zag + 8);
int16x8_t x0, x1, sz0, sz1, y0, y1;
uint16x8_t eob0, eob1;
uint16x4_t eob_d16;
uint32x2_t eob_d32;
uint32x4_t eob_q32;
/* sign of z: z >> 15 */
sz0 = vshrq_n_s16(z0, 15);
sz1 = vshrq_n_s16(z1, 15);
/* x = abs(z) */
x0 = vabsq_s16(z0);
x1 = vabsq_s16(z1);
/* x += round */
x0 = vaddq_s16(x0, round0);
x1 = vaddq_s16(x1, round1);
/* y = 2 * (x * quant) >> 16 */
y0 = vqdmulhq_s16(x0, quant0);
y1 = vqdmulhq_s16(x1, quant1);
/* Compensate for doubling in vqdmulhq */
y0 = vshrq_n_s16(y0, 1);
y1 = vshrq_n_s16(y1, 1);
/* Restore sign bit */
y0 = veorq_s16(y0, sz0);
y1 = veorq_s16(y1, sz1);
x0 = vsubq_s16(y0, sz0);
x1 = vsubq_s16(y1, sz1);
/* find non-zero elements */
eob0 = vtstq_s16(x0, one_q);
eob1 = vtstq_s16(x1, one_q);
/* mask zig zag */
eob0 = vandq_u16(eob0, zig_zag0);
eob1 = vandq_u16(eob1, zig_zag1);
/* select the largest value */
eob0 = vmaxq_u16(eob0, eob1);
eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
eob_q32 = vmovl_u16(eob_d16);
eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32));
eob_d32 = vpmax_u32(eob_d32, eob_d32);
/* qcoeff = x */
vst1q_s16(d->qcoeff, x0);
vst1q_s16(d->qcoeff + 8, x1);
/* dqcoeff = x * dequant */
vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0));
vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1));
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
}
@@ -1,131 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
unsigned int vp8_mse16x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
int64x1_t d0s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int64x2_t q1s64;
q7s32 = vdupq_n_s32(0);
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q7s32 = vaddq_s32(q7s32, q8s32);
q9s32 = vaddq_s32(q9s32, q10s32);
q10s32 = vaddq_s32(q7s32, q9s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}
unsigned int vp8_get4x4sse_cs_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride) {
int16x4_t d22s16, d24s16, d26s16, d28s16;
int64x1_t d0s64;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int64x2_t q1s64;
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d1u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d5u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d3u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d7u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);
q13u16 = vsubl_u8(d2u8, d6u8);
q14u16 = vsubl_u8(d3u8, d7u8);
d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
q7s32 = vmull_s16(d22s16, d22s16);
q8s32 = vmull_s16(d24s16, d24s16);
q9s32 = vmull_s16(d26s16, d26s16);
q10s32 = vmull_s16(d28s16, d28s16);
q7s32 = vaddq_s32(q7s32, q8s32);
q9s32 = vaddq_s32(q9s32, q10s32);
q9s32 = vaddq_s32(q7s32, q9s32);
q1s64 = vpaddlq_s32(q9s32);
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}
@@ -1,64 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/encoder/block.h"
#include <math.h>
#include "vpx_mem/vpx_mem.h"
#include "vp8/encoder/quantize.h"
#include "vp8/common/entropy.h"
#if HAVE_NEON
/* vp8_quantize_mbX functions here differs from corresponding ones in
* quantize.c only by using quantize_b_pair function pointer instead of
* the regular quantize_b function pointer */
void vp8_quantize_mby_neon(MACROBLOCK *x)
{
int i;
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
for (i = 0; i < 16; i+=2)
x->quantize_b_pair(&x->block[i], &x->block[i+1],
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
if(has_2nd_order)
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
void vp8_quantize_mb_neon(MACROBLOCK *x)
{
int i;
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
for (i = 0; i < 24; i+=2)
x->quantize_b_pair(&x->block[i], &x->block[i+1],
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
if (has_2nd_order)
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
void vp8_quantize_mbuv_neon(MACROBLOCK *x)
{
int i;
for (i = 16; i < 24; i+=2)
x->quantize_b_pair(&x->block[i], &x->block[i+1],
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
}
#endif /* HAVE_NEON */
+8 -8
View File
@@ -159,7 +159,7 @@ static void write_split(vp8_writer *bc, int x)
);
}
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *stop = p + xcount;
unsigned int split;
@@ -374,7 +374,7 @@ static void write_partition_size(unsigned char *cx_data, int size)
}
static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data,
unsigned char * cx_data_end,
int num_part)
{
@@ -398,7 +398,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = (int)(stop - p);
vp8_pack_tokens_c(w, p, tokens);
vp8_pack_tokens(w, p, tokens);
}
vp8_stop_encode(w);
@@ -407,7 +407,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
}
static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w)
{
int mb_row;
@@ -417,7 +417,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = (int)(stop - p);
vp8_pack_tokens_c(w, p, tokens);
vp8_pack_tokens(w, p, tokens);
}
}
@@ -1543,7 +1543,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
if (pc->refresh_entropy_probs == 0)
{
/* save a copy for later refresh */
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
}
vp8_update_coef_probs(cpi);
@@ -1620,7 +1620,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
/* concatenate partition buffers */
for(i = 0; i < num_part; i++)
{
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
cpi->partition_d[i+1] = dp;
dp += cpi->partition_sz[i+1];
}
@@ -1676,7 +1676,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
pack_mb_row_tokens(cpi, &cpi->bc[1]);
else
#endif
pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
vp8_stop_encode(&cpi->bc[1]);
+1 -30
View File
@@ -16,36 +16,7 @@
extern "C" {
#endif
#if HAVE_EDSP
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
unsigned char * cx_data,
const unsigned char *cx_data_end,
int num_parts,
vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_tokens_into_partitions(a,b,c,d) \
vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_mb_row_tokens(a,b) \
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount);
#ifdef __cplusplus
} // extern "C"
+4 -1
View File
@@ -126,6 +126,8 @@ typedef struct macroblock
int optimize;
int q_index;
int is_skin;
int denoise_zeromv;
#if CONFIG_TEMPORAL_DENOISING
int increase_denoising;
@@ -161,8 +163,9 @@ typedef struct macroblock
void (*short_fdct8x4)(short *input, short *output, int pitch);
void (*short_walsh4x4)(short *input, short *output, int pitch);
void (*quantize_b)(BLOCK *b, BLOCKD *d);
void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
unsigned int mbs_zero_last_dot_suppress;
int zero_last_dot_suppress;
} MACROBLOCK;
+2
View File
@@ -11,6 +11,8 @@
#include <math.h>
#include "./vp8_rtcd.h"
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
{
int i;
+31 -24
View File
@@ -374,7 +374,7 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
} else if (mode == 3) {
denoiser->denoiser_mode = kDenoiserOnYUVAggressive;
} else {
denoiser->denoiser_mode = kDenoiserOnAdaptive;
denoiser->denoiser_mode = kDenoiserOnYUV;
}
if (denoiser->denoiser_mode != kDenoiserOnYUVAggressive) {
denoiser->denoise_pars.scale_sse_thresh = 1;
@@ -390,10 +390,10 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
denoiser->denoise_pars.scale_motion_thresh = 16;
denoiser->denoise_pars.scale_increase_filter = 1;
denoiser->denoise_pars.denoise_mv_bias = 60;
denoiser->denoise_pars.pickmode_mv_bias = 60;
denoiser->denoise_pars.qp_thresh = 100;
denoiser->denoise_pars.consec_zerolast = 10;
denoiser->denoise_pars.spatial_blur = 20;
denoiser->denoise_pars.pickmode_mv_bias = 75;
denoiser->denoise_pars.qp_thresh = 80;
denoiser->denoise_pars.consec_zerolast = 15;
denoiser->denoise_pars.spatial_blur = 0;
}
}
@@ -415,8 +415,8 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
vp8_denoiser_free(denoiser);
return 1;
}
vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
denoiser->yv12_running_avg[i].frame_size);
memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
denoiser->yv12_running_avg[i].frame_size);
}
denoiser->yv12_mc_running_avg.flags = 0;
@@ -428,19 +428,19 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
return 1;
}
vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
denoiser->yv12_mc_running_avg.frame_size);
memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
denoiser->yv12_mc_running_avg.frame_size);
if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width,
height, VP8BORDERINPIXELS) < 0) {
vp8_denoiser_free(denoiser);
return 1;
}
vpx_memset(denoiser->yv12_last_source.buffer_alloc, 0,
denoiser->yv12_last_source.frame_size);
memset(denoiser->yv12_last_source.buffer_alloc, 0,
denoiser->yv12_last_source.frame_size);
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
vp8_denoiser_set_parameters(denoiser, mode);
denoiser->nmse_source_diff = 0;
denoiser->nmse_source_diff_count = 0;
@@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
// Bitrate thresholds and noise metric (nmse) thresholds for switching to
// aggressive mode.
// TODO(marpan): Adjust thresholds, including effect on resolution.
denoiser->bitrate_threshold = 200000; // (bits/sec).
denoiser->threshold_aggressive_mode = 35;
if (width * height > 640 * 480) {
denoiser->bitrate_threshold = 500000;
denoiser->threshold_aggressive_mode = 100;
denoiser->bitrate_threshold = 400000; // (bits/sec).
denoiser->threshold_aggressive_mode = 80;
if (width * height > 1280 * 720) {
denoiser->bitrate_threshold = 3000000;
denoiser->threshold_aggressive_mode = 200;
} else if (width * height > 960 * 540) {
denoiser->bitrate_threshold = 800000;
denoiser->threshold_aggressive_mode = 150;
} else if (width * height > 1280 * 720) {
denoiser->bitrate_threshold = 2000000;
denoiser->threshold_aggressive_mode = 1400;
denoiser->bitrate_threshold = 1200000;
denoiser->threshold_aggressive_mode = 120;
} else if (width * height > 640 * 480) {
denoiser->bitrate_threshold = 600000;
denoiser->threshold_aggressive_mode = 100;
}
return 0;
}
@@ -483,7 +483,6 @@ void vp8_denoiser_free(VP8_DENOISER *denoiser)
vpx_free(denoiser->denoise_state);
}
void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
MACROBLOCK *x,
unsigned int best_sse,
@@ -554,6 +553,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
* Note that any changes to the mode info only affects the
* denoising.
*/
x->denoise_zeromv = 1;
mbmi->ref_frame =
x->best_zeromv_reference_frame;
@@ -603,6 +603,12 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
motion_threshold = denoiser->denoise_pars.scale_motion_thresh *
NOISE_MOTION_THRESHOLD;
// If block is considered to be skin area, lower the motion threshold.
// In current version set threshold = 1, so only denoise very low
// (i.e., zero) mv on skin.
if (x->is_skin)
motion_threshold = 1;
if (motion_magnitude2 <
denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD)
x->increase_denoising = 1;
@@ -662,6 +668,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
/* No filtering of this block; it differs too much from the predictor,
* or the motion vector magnitude is considered too big.
*/
x->denoise_zeromv = 0;
vp8_copy_mem16x16(
x->thismb, 16,
denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
@@ -692,7 +699,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
// Fix filter level to some nominal value for now.
int filter_level = 32;
int filter_level = 48;
int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
lfi.mblim = lfi_n->mblim[filter_level];
+3 -1
View File
@@ -19,7 +19,7 @@ extern "C" {
#endif
#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
#define SUM_DIFF_THRESHOLD_HIGH (600)
#define SUM_DIFF_THRESHOLD_HIGH (600) // ~(16 * 16 * 1.5)
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)
@@ -27,6 +27,8 @@ extern "C" {
#define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 8)
#define MOTION_MAGNITUDE_THRESHOLD_UV (8*3)
#define MAX_GF_ARF_DENOISE_RANGE (8)
enum vp8_denoiser_decision
{
COPY_BLOCK,
+18 -11
View File
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "encodemb.h"
#include "encodemv.h"
#include "vp8/common/common.h"
@@ -82,6 +83,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
{
unsigned int act;
unsigned int sse;
(void)cpi;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
@@ -89,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
act = vp8_variance16x16(x->src.y_buffer,
act = vpx_variance16x16(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
act = act<<4;
@@ -154,8 +156,8 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
cpi->common.MBs));
/* Copy map to sort list */
vpx_memcpy( sortlist, cpi->mb_activity_map,
sizeof(unsigned int) * cpi->common.MBs );
memcpy( sortlist, cpi->mb_activity_map,
sizeof(unsigned int) * cpi->common.MBs );
/* Ripple each value down to its correct position */
@@ -522,7 +524,8 @@ void encode_mb_row(VP8_COMP *cpi,
}
#endif
// Keep track of how many (consecutive) times a block is coded
// Keep track of how many (consecutive) times a block is coded
// as ZEROMV_LASTREF, for base layer frames.
// Reset to 0 if its coded as anything else.
if (cpi->current_layer == 0) {
@@ -531,9 +534,14 @@ void encode_mb_row(VP8_COMP *cpi,
// Increment, check for wrap-around.
if (cpi->consec_zero_last[map_index+mb_col] < 255)
cpi->consec_zero_last[map_index+mb_col] += 1;
if (cpi->consec_zero_last_mvbias[map_index+mb_col] < 255)
cpi->consec_zero_last_mvbias[map_index+mb_col] += 1;
} else {
cpi->consec_zero_last[map_index+mb_col] = 0;
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
}
if (x->zero_last_dot_suppress)
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
}
/* Special case code for cyclic refresh
@@ -574,7 +582,7 @@ void encode_mb_row(VP8_COMP *cpi,
/* pack tokens for this MB */
{
int tok_count = *tp - tp_start;
pack_tokens(w, tp_start, tok_count);
vp8_pack_tokens(w, tp_start, tok_count);
}
#endif
/* Increment pointer into gf usage flags structure. */
@@ -658,8 +666,7 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
x->mvc = cm->fc.mvc;
vpx_memset(cm->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
/* Special case treatment when GF and ARF are not sensible options
* for reference
@@ -737,7 +744,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
const int num_part = (1 << cm->multi_token_partition);
#endif
vpx_memset(segment_counts, 0, sizeof(segment_counts));
memset(segment_counts, 0, sizeof(segment_counts));
totalrate = 0;
if (cpi->compressor_speed == 2)
@@ -967,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
int i;
/* Set to defaults */
vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
@@ -1143,6 +1150,8 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
while (++b < 16);
}
#else
(void)cpi;
#endif
++x->ymode_count[m];
@@ -1252,7 +1261,6 @@ int vp8cx_encode_inter_macroblock
if(cpi->sf.use_fastquant_for_pick)
{
x->quantize_b = vp8_fast_quantize_b;
x->quantize_b_pair = vp8_fast_quantize_b_pair;
/* the fast quantizer does not use zbin_extra, so
* do not recalculate */
@@ -1265,7 +1273,6 @@ int vp8cx_encode_inter_macroblock
if (cpi->sf.improved_quant)
{
x->quantize_b = vp8_regular_quantize_b;
x->quantize_b_pair = vp8_regular_quantize_b_pair;
}
/* restore cpi->zbin_mode_boost_enabled */
+2 -1
View File
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
}
}
intra_pred_var = vp8_get_mb_ss(x->src_diff);
intra_pred_var = vpx_get_mb_ss(x->src_diff);
return intra_pred_var;
}
+6 -6
View File
@@ -506,8 +506,8 @@ static void optimize_mb(MACROBLOCK *x)
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -555,8 +555,8 @@ void vp8_optimize_mby(MACROBLOCK *x)
if (!x->e_mbd.left_context)
return;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -595,8 +595,8 @@ void vp8_optimize_mbuv(MACROBLOCK *x)
if (!x->e_mbd.left_context)
return;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
+19 -19
View File
@@ -19,8 +19,6 @@
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
static THREAD_FUNCTION thread_loopfilter(void *p_data)
{
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
@@ -215,11 +213,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
LAST_FRAME) {
// Increment, check for wrap-around.
if (cpi->consec_zero_last[map_index+mb_col] < 255)
cpi->consec_zero_last[map_index+mb_col] +=
1;
cpi->consec_zero_last[map_index+mb_col] += 1;
if (cpi->consec_zero_last_mvbias[map_index+mb_col] < 255)
cpi->consec_zero_last_mvbias[map_index+mb_col] += 1;
} else {
cpi->consec_zero_last[map_index+mb_col] = 0;
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
}
if (x->zero_last_dot_suppress)
cpi->consec_zero_last_mvbias[map_index+mb_col] = 0;
}
/* Special case code for cyclic refresh
@@ -261,7 +263,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
/* pack tokens for this MB */
{
int tok_count = tp - tp_start;
pack_tokens(w, tp_start, tok_count);
vp8_pack_tokens(w, tp_start, tok_count);
}
#else
cpi->tplist[mb_row].stop = tp;
@@ -346,7 +348,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->short_fdct8x4 = x->short_fdct8x4;
z->short_walsh4x4 = x->short_walsh4x4;
z->quantize_b = x->quantize_b;
z->quantize_b_pair = x->quantize_b_pair;
z->optimize = x->optimize;
/*
@@ -413,14 +414,13 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
zd->segmentation_enabled = xd->segmentation_enabled;
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
sizeof(xd->segment_feature_data));
memcpy(zd->segment_feature_data, xd->segment_feature_data,
sizeof(xd->segment_feature_data));
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
#if 1
/*TODO: Remove dequant from BLOCKD. This is a temporary solution until
@@ -435,15 +435,14 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
#endif
vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,
sizeof(x->rd_thresh_mult));
memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult));
z->zbin_over_quant = x->zbin_over_quant;
z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
z->zbin_mode_boost = x->zbin_mode_boost;
vpx_memset(z->error_bins, 0, sizeof(z->error_bins));
memset(z->error_bins, 0, sizeof(z->error_bins));
}
}
@@ -469,7 +468,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
mb->gf_active_ptr = x->gf_active_ptr;
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
mbr_ei[i].totalrate = 0;
mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
@@ -506,6 +505,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->intra_error = 0;
vp8_zero(mb->count_mb_ref_frame_usage);
mb->mbs_tested_so_far = 0;
mb->mbs_zero_last_dot_suppress = 0;
}
}
@@ -543,7 +543,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
vpx_malloc(sizeof(sem_t) * th_count));
CHECK_MEM_ERROR(cpi->mb_row_ei,
vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
CHECK_MEM_ERROR(cpi->en_thread_data,
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
+25 -24
View File
@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "block.h"
#include "onyx_int.h"
@@ -34,8 +35,6 @@
/* #define OUTPUT_FPF 1 */
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
@@ -132,6 +131,7 @@ static void output_stats(const VP8_COMP *cpi,
FIRSTPASS_STATS *stats)
{
struct vpx_codec_cx_pkt pkt;
(void)cpi;
pkt.kind = VPX_CODEC_STATS_PKT;
pkt.data.twopass_stats.buf = stats;
pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
@@ -418,18 +418,19 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
int raw_stride = raw_buffer->y_stride;
unsigned char *ref_ptr;
int ref_stride = x->e_mbd.pre.y_stride;
(void)cpi;
/* Set up pointers for this macro block raw buffer */
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
+ d->offset);
vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
(unsigned int *)(raw_motion_err));
vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
(unsigned int *)(raw_motion_err));
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
(unsigned int *)(best_motion_err));
vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
(unsigned int *)(best_motion_err));
}
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -453,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
int new_mv_mode_penalty = 256;
/* override the default variance function to use MSE */
v_fn_ptr.vf = vp8_mse16x16;
v_fn_ptr.vf = vpx_mse16x16;
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -571,7 +572,7 @@ void vp8_first_pass(VP8_COMP *cpi)
{
int flag[2] = {1, 1};
vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
}
@@ -1327,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
{
FIRSTPASS_STATS this_frame;
@@ -1409,6 +1408,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
void vp8_end_second_pass(VP8_COMP *cpi)
{
(void)cpi;
}
/* This function gives and estimate of how badly we believe the prediction
@@ -1419,6 +1419,7 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra
double prediction_decay_rate;
double motion_decay;
double motion_pct = next_frame->pcnt_motion;
(void)cpi;
/* Initial basis is the % mbs inter coded */
prediction_decay_rate = next_frame->pcnt_inter;
@@ -1547,6 +1548,7 @@ static void accumulate_frame_motion_stats(
double this_frame_mvr_ratio;
double this_frame_mvc_ratio;
double motion_pct;
(void)cpi;
/* Accumulate motion stats. */
motion_pct = this_frame->pcnt_motion;
@@ -1774,7 +1776,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
start_pos = cpi->twopass.stats_in;
vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
/* Load stats for the current frame. */
mod_frame_err = calculate_modified_err(cpi, this_frame);
@@ -1870,7 +1872,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
break;
}
vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame));
memcpy(this_frame, &next_frame, sizeof(*this_frame));
old_boost_score = boost_score;
}
@@ -2440,7 +2442,7 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->twopass.frames_to_key == 0)
{
/* Define next KF group and assign bits to it */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
find_next_key_frame(cpi, &this_frame_copy);
/* Special case: Error error_resilient_mode mode does not make much
@@ -2466,7 +2468,7 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->frames_till_gf_update_due == 0)
{
/* Define next gf group and assign bits to it */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
define_gf_group(cpi, &this_frame_copy);
/* If we are going to code an altref frame at the end of the group
@@ -2482,7 +2484,7 @@ void vp8_second_pass(VP8_COMP *cpi)
* to the GF group
*/
int bak = cpi->per_frame_bandwidth;
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
cpi->per_frame_bandwidth = bak;
}
@@ -2505,14 +2507,14 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->common.frame_type != KEY_FRAME)
{
/* Assign bits from those allocated to the GF group */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
}
}
else
{
/* Assign bits from those allocated to the GF group */
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
}
}
@@ -2653,7 +2655,7 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
double decay_accumulator = 1.0;
double next_iiratio;
vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
/* Note the starting file position so we can reset to it */
start_pos = cpi->twopass.stats_in;
@@ -2730,7 +2732,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double kf_group_coded_err = 0.0;
double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
vpx_memset(&next_frame, 0, sizeof(next_frame));
memset(&next_frame, 0, sizeof(next_frame));
vp8_clear_system_state();
start_position = cpi->twopass.stats_in;
@@ -2751,7 +2753,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.frames_to_key = 1;
/* Take a copy of the initial frame details */
vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
memcpy(&first_frame, this_frame, sizeof(*this_frame));
cpi->twopass.kf_group_bits = 0;
cpi->twopass.kf_group_error_left = 0;
@@ -2774,7 +2776,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
kf_group_coded_err += this_frame->coded_error;
/* Load the next frame's stats. */
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
memcpy(&last_frame, this_frame, sizeof(*this_frame));
input_stats(cpi, this_frame);
/* Provided that we are not at the end of the file... */
@@ -2842,7 +2844,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.frames_to_key /= 2;
/* Copy first frame details */
vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
/* Reset to the start of the group */
reset_fpf_position(cpi, start_position);
@@ -2964,7 +2966,6 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
*/
decay_accumulator = 1.0;
boost_score = 0.0;
loop_decay_rate = 1.00; /* Starting decay rate */
for (i = 0 ; i < cpi->twopass.frames_to_key ; i++)
{
@@ -3208,7 +3209,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int new_width = cpi->oxcf.Width;
int new_height = cpi->oxcf.Height;
int projected_buffer_level = (int)cpi->buffer_level;
int projected_buffer_level;
int tmp_q;
double projected_bits_perframe;
+31 -29
View File
@@ -9,6 +9,8 @@
*/
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
@@ -888,6 +890,8 @@ int vp8_hex_search
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
(void)mvcost;
/* adjust ref_mv to make sure it is within MV range */
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
br = ref_mv->as_mv.row;
@@ -898,7 +902,7 @@ int vp8_hex_search
this_offset = base_offset + (br * (pre_stride)) + bc;
this_mv.as_mv.row = br;
this_mv.as_mv.col = bc;
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
#if CONFIG_MULTI_RES_ENCODING
@@ -911,6 +915,8 @@ int vp8_hex_search
else if (search_param >= 1) hex_range = 63;
dia_range = 8;
#else
(void)search_param;
#endif
/* hex search */
@@ -923,7 +929,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + hex[i].row;
this_mv.as_mv.col = bc + hex[i].col;
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -934,7 +940,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + hex[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -960,7 +966,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + next_chkpts[k][i].row;
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -971,7 +977,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1002,7 +1008,7 @@ cal_neighbors:
this_mv.as_mv.row = br + neighbors[i].row;
this_mv.as_mv.col = bc + neighbors[i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -1013,7 +1019,7 @@ cal_neighbors:
this_mv.as_mv.col = bc + neighbors[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1097,7 +1103,7 @@ int vp8_diamond_search_sad_c
best_address = in_what;
/* Check the starting position */
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence
@@ -1122,7 +1128,7 @@ int vp8_diamond_search_sad_c
{
check_here = ss[i].offset + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1221,7 +1227,7 @@ int vp8_diamond_search_sadx4
best_address = in_what;
/* Check the starting position */
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence the
@@ -1289,7 +1295,7 @@ int vp8_diamond_search_sadx4
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = ss[i].offset + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1372,8 +1378,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that
@@ -1398,7 +1403,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
for (c = col_min; c < col_max; c++)
{
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
this_mv.as_mv.col = c;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
@@ -1470,8 +1475,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1527,7 +1531,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
while (c < col_max)
{
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1586,7 +1590,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
// TODO(johannkoenig): check if this alignment is necessary.
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
unsigned int sad_array[3];
int *mvsadcost[2];
@@ -1605,8 +1610,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1692,7 +1696,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
while (c < col_max)
{
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1750,8 +1754,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
bestsad = fn_ptr->sdf(what, what_stride, best_address,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1767,7 +1770,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1830,8 +1833,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
bestsad = fn_ptr->sdf(what, what_stride, best_address,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1882,7 +1884,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1974,8 +1976,8 @@ void print_mode_context(void)
#ifdef VP8_ENTROPY_STATS
void init_mv_ref_counts()
{
vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
}
void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
+1
View File
@@ -10,6 +10,7 @@
#include "vp8/common/blockd.h"
#include "modecosts.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "vp8/common/entropymode.h"
+3 -1
View File
@@ -16,7 +16,9 @@
extern "C" {
#endif
void vp8_init_mode_costs(VP8_COMP *x);
struct VP8_COMP;
void vp8_init_mode_costs(struct VP8_COMP *x);
#ifdef __cplusplus
} // extern "C"
+380 -131
View File
@@ -11,6 +11,8 @@
#include "vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
@@ -427,10 +429,10 @@ static void setup_features(VP8_COMP *cpi)
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
set_default_lf_deltas(cpi);
@@ -507,7 +509,7 @@ static void disable_segmentation(VP8_COMP *cpi)
static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
{
/* Copy in the new segmentation map */
vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
/* Signal that the map should be updated. */
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
@@ -529,7 +531,7 @@ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
{
cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
}
@@ -579,11 +581,32 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
cpi->cyclic_refresh_q = Q / 2;
if (cpi->oxcf.screen_content_mode) {
// Modify quality ramp-up based on Q. Above some Q level, increase the
// number of blocks to be refreshed, and reduce it below the thredhold.
// Turn-off under certain conditions (i.e., away from key frame, and if
// we are at good quality (low Q) and most of the blocks were skipped-encoded
// in previous frame.
int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100;
if (Q >= qp_thresh) {
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 10;
} else if (cpi->frames_since_key > 250 &&
Q < 20 &&
cpi->mb.skip_true_count > (int)(0.95 * mbs_in_frame)) {
cpi->cyclic_refresh_mode_max_mbs_perframe = 0;
} else {
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 20;
}
block_count = cpi->cyclic_refresh_mode_max_mbs_perframe;
}
// Set every macroblock to be eligible for update.
// For key frame this will reset seg map to 0.
vpx_memset(cpi->segmentation_map, 0, mbs_in_frame);
memset(cpi->segmentation_map, 0, mbs_in_frame);
if (cpi->common.frame_type != KEY_FRAME)
if (cpi->common.frame_type != KEY_FRAME && block_count > 0)
{
/* Cycle through the macro_block rows */
/* MB loop to set local segmentation map */
@@ -617,15 +640,18 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive &&
Q < (int)cpi->denoiser.denoise_pars.qp_thresh) {
Q < (int)cpi->denoiser.denoise_pars.qp_thresh &&
(cpi->frames_since_key >
2 * cpi->denoiser.denoise_pars.consec_zerolast)) {
// Under aggressive denoising, use segmentation to turn off loop
// filter below some qp thresh. The filter is turned off for all
// filter below some qp thresh. The filter is reduced for all
// blocks that have been encoded as ZEROMV LAST x frames in a row,
// where x is set by cpi->denoiser.denoise_pars.consec_zerolast.
// This is to avoid "dot" artifacts that can occur from repeated
// loop filtering on noisy input source.
cpi->cyclic_refresh_q = Q;
lf_adjustment = -MAX_LOOP_FILTER;
// lf_adjustment = -MAX_LOOP_FILTER;
lf_adjustment = -40;
for (i = 0; i < mbs_in_frame; ++i) {
seg_map[i] = (cpi->consec_zero_last[i] >
cpi->denoiser.denoise_pars.consec_zerolast) ? 1 : 0;
@@ -662,8 +688,8 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
/* Test of ref frame deltas */
cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
@@ -786,6 +812,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
}
cpi->mb.mbs_tested_so_far = 0;
cpi->mb.mbs_zero_last_dot_suppress = 0;
/* best quality defaults */
sf->RD = 1;
@@ -853,6 +880,25 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLIT2] =
sf->thresh_mult[THR_SPLIT3] = speed_map(Speed, thresh_mult_map_split2);
// Special case for temporal layers.
// Reduce the thresholds for zero/nearest/near for GOLDEN, if GOLDEN is
// used as second reference. We don't modify thresholds for ALTREF case
// since ALTREF is usually used as long-term reference in temporal layers.
if ((cpi->Speed <= 6) &&
(cpi->oxcf.number_of_layers > 1) &&
(cpi->ref_frame_flags & VP8_LAST_FRAME) &&
(cpi->ref_frame_flags & VP8_GOLD_FRAME)) {
if (cpi->closest_reference_frame == GOLDEN_FRAME) {
sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 3;
sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 3;
sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 3;
} else {
sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 1;
sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 1;
sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 1;
}
}
cpi->mode_check_freq[THR_ZERO1] =
cpi->mode_check_freq[THR_NEAREST1] =
cpi->mode_check_freq[THR_NEAR1] =
@@ -1043,7 +1089,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (Speed >= 15)
sf->half_pixel_search = 0;
vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
}; /* switch */
@@ -1083,12 +1129,10 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (cpi->sf.improved_quant)
{
cpi->mb.quantize_b = vp8_regular_quantize_b;
cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
}
else
{
cpi->mb.quantize_b = vp8_fast_quantize_b;
cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;
}
if (cpi->sf.improved_quant != last_improved_quant)
vp8cx_init_quantizer(cpi);
@@ -1256,7 +1300,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
CHECK_MEM_ERROR(cpi->active_map,
vpx_calloc(cm->mb_rows * cm->mb_cols,
sizeof(*cpi->active_map)));
vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
#if CONFIG_MULTITHREAD
if (width < 640)
@@ -1363,20 +1407,31 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cm->version = oxcf->Version;
vp8_setup_version(cm);
/* frame rate is not available on the first frame, as it's derived from
/* Frame rate is not available on the first frame, as it's derived from
* the observed timestamps. The actual value used here doesn't matter
* too much, as it will adapt quickly. If the reciprocal of the timebase
* seems like a reasonable framerate, then use that as a guess, otherwise
* use 30.
* too much, as it will adapt quickly.
*/
cpi->framerate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
if (oxcf->timebase.num > 0) {
cpi->framerate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
} else {
cpi->framerate = 30;
}
/* If the reciprocal of the timebase seems like a reasonable framerate,
* then use that as a guess, otherwise use 30.
*/
if (cpi->framerate > 180)
cpi->framerate = 30;
cpi->ref_framerate = cpi->framerate;
cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 1;
cm->refresh_entropy_probs = 1;
/* change includes all joint functionality */
vp8_change_config(cpi, oxcf);
@@ -1597,12 +1652,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->baseline_gf_interval =
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 1;
cm->refresh_entropy_probs = 1;
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
cpi->oxcf.token_partitions = 3;
#endif
@@ -1705,13 +1754,25 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (cpi->oxcf.number_of_layers != prev_number_of_layers)
{
// If the number of temporal layers are changed we must start at the
// base of the pattern cycle, so reset temporal_pattern_counter.
// base of the pattern cycle, so set the layer id to 0 and reset
// the temporal pattern counter.
if (cpi->temporal_layer_id > 0) {
cpi->temporal_layer_id = 0;
}
cpi->temporal_pattern_counter = 0;
reset_temporal_layer_change(cpi, oxcf, prev_number_of_layers);
}
if (!cpi->initial_width)
{
cpi->initial_width = cpi->oxcf.Width;
cpi->initial_height = cpi->oxcf.Height;
}
cm->Width = cpi->oxcf.Width;
cm->Height = cpi->oxcf.Height;
assert(cm->Width <= cpi->initial_width);
assert(cm->Height <= cpi->initial_height);
/* TODO(jkoleszar): if an internal spatial resampling is active,
* and we downsize the input image, maybe we should clear the
@@ -1832,7 +1893,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cm = &cpi->common;
vpx_memset(cpi, 0, sizeof(VP8_COMP));
memset(cpi, 0, sizeof(VP8_COMP));
if (setjmp(cm->error.jmp))
{
@@ -1852,6 +1913,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
cpi->common.current_video_frame = 0;
cpi->temporal_pattern_counter = 0;
cpi->temporal_layer_id = -1;
cpi->kf_overspend_bits = 0;
cpi->kf_bitrate_adjustment = 0;
cpi->frames_till_gf_update_due = 0;
@@ -1904,6 +1966,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
}
#endif
cpi->mse_source_denoised = 0;
/* Should we use the cyclic refresh method.
* Currently this is tied to error resilliant mode
*/
@@ -1927,7 +1991,9 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->cyclic_refresh_map = (signed char *) NULL;
CHECK_MEM_ERROR(cpi->consec_zero_last,
vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
vpx_calloc(cm->mb_rows * cm->mb_cols, 1));
CHECK_MEM_ERROR(cpi->consec_zero_last_mvbias,
vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
#ifdef VP8_ENTROPY_STATS
init_context_counters();
@@ -1946,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->source_alt_ref_active = 0;
cpi->common.refresh_alt_ref_frame = 0;
cpi->force_maxqp = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
@@ -2062,55 +2130,55 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
}
#endif
cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16;
cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vp8_variance_halfpixvar16x16_hv;
cpi->fn_ptr[BLOCK_16X16].sdx3f = vp8_sad16x16x3;
cpi->fn_ptr[BLOCK_16X16].sdx8f = vp8_sad16x16x8;
cpi->fn_ptr[BLOCK_16X16].sdx4df = vp8_sad16x16x4d;
cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3;
cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8;
cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
cpi->fn_ptr[BLOCK_16X8].sdf = vp8_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8;
cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_16X8].sdx3f = vp8_sad16x8x3;
cpi->fn_ptr[BLOCK_16X8].sdx8f = vp8_sad16x8x8;
cpi->fn_ptr[BLOCK_16X8].sdx4df = vp8_sad16x8x4d;
cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3;
cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8;
cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
cpi->fn_ptr[BLOCK_8X16].sdf = vp8_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16;
cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X16].sdx3f = vp8_sad8x16x3;
cpi->fn_ptr[BLOCK_8X16].sdx8f = vp8_sad8x16x8;
cpi->fn_ptr[BLOCK_8X16].sdx4df = vp8_sad8x16x4d;
cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3;
cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8;
cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
cpi->fn_ptr[BLOCK_8X8].sdf = vp8_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8;
cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X8].sdx3f = vp8_sad8x8x3;
cpi->fn_ptr[BLOCK_8X8].sdx8f = vp8_sad8x8x8;
cpi->fn_ptr[BLOCK_8X8].sdx4df = vp8_sad8x8x4d;
cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3;
cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8;
cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
cpi->fn_ptr[BLOCK_4X4].sdf = vp8_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4;
cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_4X4].sdx3f = vp8_sad4x4x3;
cpi->fn_ptr[BLOCK_4X4].sdx8f = vp8_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vp8_sad4x4x4d;
cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3;
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
#if ARCH_X86 || ARCH_X86_64
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
@@ -2206,9 +2274,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
if (cpi->b_calculate_psnr)
{
YV12_BUFFER_CONFIG *lst_yv12 =
&cpi->common.yv12_fb[cpi->common.lst_fb_idx];
if (cpi->oxcf.number_of_layers > 1)
{
int i;
@@ -2220,7 +2285,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
double dr = (double)cpi->bytes_in_layer[i] *
8.0 / 1000.0 / time_encoded;
double samples = 3.0 / 2 * cpi->frames_in_layer[i] *
lst_yv12->y_width * lst_yv12->y_height;
cpi->common.Width * cpi->common.Height;
double total_psnr =
vpx_sse_to_psnr(samples, 255.0,
cpi->total_error2[i]);
@@ -2242,7 +2307,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
else
{
double samples = 3.0 / 2 * cpi->count *
lst_yv12->y_width * lst_yv12->y_height;
cpi->common.Width * cpi->common.Height;
double total_psnr = vpx_sse_to_psnr(samples, 255.0,
cpi->total_sq_error);
double total_psnr2 = vpx_sse_to_psnr(samples, 255.0,
@@ -2450,6 +2515,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
vpx_free(cpi->tok);
vpx_free(cpi->cyclic_refresh_map);
vpx_free(cpi->consec_zero_last);
vpx_free(cpi->consec_zero_last_mvbias);
vp8_remove_common(&cpi->common);
vpx_free(cpi);
@@ -2492,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
{
unsigned int sse;
vp8_mse16x16(orig + col, orig_stride,
vpx_mse16x16(orig + col, orig_stride,
recon + col, recon_stride,
&sse);
total_sse += sse;
@@ -2805,7 +2871,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
}
/* Update data structure that monitors level of reference to last GF */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
/* this frame refreshes means next frames don't unless specified by user */
@@ -2854,7 +2920,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
}
/* Update data structure that monitors level of reference to last GF */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
/* this frame refreshes means next frames don't unless specified by
@@ -3293,6 +3359,49 @@ static void update_reference_frames(VP8_COMP *cpi)
}
static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
VP8_COMP *cpi)
{
int i, j;
int Total = 0;
int num_blocks = 0;
int skip = 2;
int min_consec_zero_last = 10;
int tot_num_blocks = (source->y_height * source->y_width) >> 8;
unsigned char *src = source->y_buffer;
unsigned char *dst = dest->y_buffer;
/* Loop through the Y plane, every |skip| blocks along rows and colmumns,
* summing the square differences, and only for blocks that have been
* zero_last mode at least |x| frames in a row.
*/
for (i = 0; i < source->y_height; i += 16 * skip)
{
int block_index_row = (i >> 4) * cpi->common.mb_cols;
for (j = 0; j < source->y_width; j += 16 * skip)
{
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
Total += vpx_mse16x16(src + j,
source->y_stride,
dst + j, dest->y_stride,
&sse);
num_blocks++;
}
}
src += 16 * skip * source->y_stride;
dst += 16 * skip * dest->y_stride;
}
// Only return non-zero if we have at least ~1/16 samples for estimate.
if (num_blocks > (tot_num_blocks >> 4)) {
return (Total / num_blocks);
} else {
return 0;
}
}
#if CONFIG_TEMPORAL_DENOISING
static void process_denoiser_mode_change(VP8_COMP *cpi) {
const VP8_COMMON *const cm = &cpi->common;
@@ -3305,12 +3414,12 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
// Only select blocks for computing nmse that have been encoded
// as ZERO LAST min_consec_zero_last frames in a row.
// Scale with number of temporal layers.
int min_consec_zero_last = 8 / cpi->oxcf.number_of_layers;
int min_consec_zero_last = 12 / cpi->oxcf.number_of_layers;
// Decision is tested for changing the denoising mode every
// num_mode_change times this function is called. Note that this
// function called every 8 frames, so (8 * num_mode_change) is number
// of frames where denoising mode change is tested for switch.
int num_mode_change = 15;
int num_mode_change = 20;
// Framerate factor, to compensate for larger mse at lower framerates.
// Use ref_framerate, which is full source framerate for temporal layers.
// TODO(marpan): Adjust this factor.
@@ -3322,7 +3431,12 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
static const unsigned char const_source[16] = {
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128};
int bandwidth = (int)(cpi->target_bandwidth);
// For temporal layers, use full bandwidth (top layer).
if (cpi->oxcf.number_of_layers > 1) {
LAYER_CONTEXT *lc = &cpi->layer_context[cpi->oxcf.number_of_layers - 1];
bandwidth = (int)(lc->target_bandwidth);
}
// Loop through the Y plane, every skip blocks along rows and columns,
// summing the normalized mean square error, only for blocks that have
// been encoded as ZEROMV LAST at least min_consec_zero_last least frames in
@@ -3334,12 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
const unsigned int mse = vp8_mse16x16(src + j,
ystride,
dst + j,
ystride,
&sse);
const unsigned int var = vp8_variance16x16(src + j,
const unsigned int var = vpx_variance16x16(src + j,
ystride,
dst + j,
ystride,
@@ -3347,14 +3456,15 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
// Only consider this block as valid for noise measurement
// if the sum_diff average of the current and previous frame
// is small (to avoid effects from lighting change).
if ((mse - var) < 256) {
const unsigned int act = vp8_variance16x16(src + j,
if ((sse - var) < 128) {
unsigned int sse2;
const unsigned int act = vpx_variance16x16(src + j,
ystride,
const_source,
0,
&sse);
&sse2);
if (act > 0)
total += mse / act;
total += sse / act;
num_blocks++;
}
}
@@ -3370,16 +3480,17 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
if (total > 0 &&
(num_blocks > (tot_num_blocks >> 4))) {
// Update the recursive mean square source_diff.
total = (total << 8) / num_blocks;
if (cpi->denoiser.nmse_source_diff_count == 0) {
// First sample in new interval.
cpi->denoiser.nmse_source_diff = total;
cpi->denoiser.qp_avg = cm->base_qindex;
} else {
// For subsequent samples, use average with weight ~1/4 for new sample.
cpi->denoiser.nmse_source_diff = (int)((total >> 2) +
3 * (cpi->denoiser.nmse_source_diff >> 2));
cpi->denoiser.qp_avg = (int)((cm->base_qindex >> 2) +
3 * (cpi->denoiser.qp_avg >> 2));
cpi->denoiser.nmse_source_diff = (int)((total +
3 * cpi->denoiser.nmse_source_diff) >> 2);
cpi->denoiser.qp_avg = (int)((cm->base_qindex +
3 * cpi->denoiser.qp_avg) >> 2);
}
cpi->denoiser.nmse_source_diff_count++;
}
@@ -3391,7 +3502,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
(cpi->denoiser.nmse_source_diff >
cpi->denoiser.threshold_aggressive_mode) &&
(cpi->denoiser.qp_avg < cpi->denoiser.qp_threshold_up &&
cpi->target_bandwidth > cpi->denoiser.bitrate_threshold)) {
bandwidth > cpi->denoiser.bitrate_threshold)) {
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive);
} else {
// Check for going down: from aggressive to normal mode.
@@ -3400,7 +3511,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
cpi->denoiser.threshold_aggressive_mode)) ||
((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) &&
(cpi->denoiser.qp_avg > cpi->denoiser.qp_threshold_down ||
cpi->target_bandwidth < cpi->denoiser.bitrate_threshold))) {
bandwidth < cpi->denoiser.bitrate_threshold))) {
vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV);
}
}
@@ -3416,6 +3527,13 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
{
const FRAME_TYPE frame_type = cm->frame_type;
int update_any_ref_buffers = 1;
if (cpi->common.refresh_last_frame == 0 &&
cpi->common.refresh_golden_frame == 0 &&
cpi->common.refresh_alt_ref_frame == 0) {
update_any_ref_buffers = 0;
}
if (cm->no_lpf)
{
cm->filter_level = 0;
@@ -3427,11 +3545,36 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
vp8_clear_system_state();
vpx_usec_timer_start(&timer);
if (cpi->sf.auto_filter == 0)
if (cpi->sf.auto_filter == 0) {
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) {
// Use the denoised buffer for selecting base loop filter level.
// Denoised signal for current frame is stored in INTRA_FRAME.
// No denoising on key frames.
vp8cx_pick_filter_level_fast(
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi);
} else {
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
}
#else
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
else
#endif
} else {
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) {
// Use the denoised buffer for selecting base loop filter level.
// Denoised signal for current frame is stored in INTRA_FRAME.
// No denoising on key frames.
vp8cx_pick_filter_level(
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi);
} else {
vp8cx_pick_filter_level(cpi->Source, cpi);
}
#else
vp8cx_pick_filter_level(cpi->Source, cpi);
#endif
}
if (cm->filter_level > 0)
{
@@ -3447,7 +3590,9 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
#endif
if (cm->filter_level > 0)
// No need to apply loop-filter if the encoded frame does not update
// any reference buffers.
if (cm->filter_level > 0 && update_any_ref_buffers)
{
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type);
}
@@ -3577,39 +3722,78 @@ static void encode_frame_to_data_rate
}
#if CONFIG_MULTI_RES_ENCODING
/* In multi-resolution encoding, frame_type is decided by lowest-resolution
* encoder. Same frame_type is adopted while encoding at other resolution.
*/
if (cpi->oxcf.mr_encoder_id)
{
LOWER_RES_FRAME_INFO* low_res_frame_info
= (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
if (cpi->oxcf.mr_total_resolutions > 1) {
LOWER_RES_FRAME_INFO* low_res_frame_info
= (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
if (cpi->oxcf.mr_encoder_id) {
// TODO(marpan): This constraint shouldn't be needed, as we would like
// to allow for key frame setting (forced or periodic) defined per
// spatial layer. For now, keep this in.
cm->frame_type = low_res_frame_info->frame_type;
// Check if lower resolution is available for motion vector reuse.
if(cm->frame_type != KEY_FRAME)
{
cpi->mr_low_res_mv_avail = 1;
cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
cpi->mr_low_res_mv_avail = 1;
cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
if (cpi->ref_frame_flags & VP8_LAST_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
== low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
if (cpi->ref_frame_flags & VP8_LAST_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
== low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
== low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
== low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
// Don't use altref to determine whether low res is available.
// TODO (marpan): Should we make this type of condition on a
// per-reference frame basis?
/*
if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
*/
}
}
// On a key frame: For the lowest resolution, keep track of the key frame
// counter value. For the higher resolutions, reset the current video
// frame counter to that of the lowest resolution.
// This is done to the handle the case where we may stop/start encoding
// higher layer(s). The restart-encoding of higher layer is only signaled
// by a key frame for now.
// TODO (marpan): Add flag to indicate restart-encoding of higher layer.
if (cm->frame_type == KEY_FRAME) {
if (cpi->oxcf.mr_encoder_id) {
// If the initial starting value of the buffer level is zero (this can
// happen because we may have not started encoding this higher stream),
// then reset it to non-zero value based on |starting_buffer_level|.
if (cpi->common.current_video_frame == 0 && cpi->buffer_level == 0) {
unsigned int i;
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
for (i = 0; i < cpi->oxcf.number_of_layers; i++) {
LAYER_CONTEXT *lc = &cpi->layer_context[i];
lc->bits_off_target = lc->starting_buffer_level;
lc->buffer_level = lc->starting_buffer_level;
}
}
cpi->common.current_video_frame =
low_res_frame_info->key_frame_counter_value;
} else {
low_res_frame_info->key_frame_counter_value =
cpi->common.current_video_frame;
}
}
}
#endif
// Find the reference frame closest to the current frame.
cpi->closest_reference_frame = LAST_FRAME;
if (cm->frame_type != KEY_FRAME) {
if(cm->frame_type != KEY_FRAME) {
int i;
MV_REFERENCE_FRAME closest_ref = INTRA_FRAME;
if (cpi->ref_frame_flags & VP8_LAST_FRAME) {
@@ -3619,12 +3803,12 @@ static void encode_frame_to_data_rate
} else if (cpi->ref_frame_flags & VP8_ALTR_FRAME) {
closest_ref = ALTREF_FRAME;
}
for (i = 1; i <= 3; i++) {
for(i = 1; i <= 3; i++) {
vpx_ref_frame_type_t ref_frame_type = (vpx_ref_frame_type_t)
((i == 3) ? 4 : i);
if (cpi->ref_frame_flags & ref_frame_type) {
if ((cm->current_video_frame - cpi->current_ref_frames[i]) <
(cm->current_video_frame - cpi->current_ref_frames[closest_ref])) {
(cm->current_video_frame - cpi->current_ref_frames[closest_ref])) {
closest_ref = i;
}
}
@@ -3650,7 +3834,9 @@ static void encode_frame_to_data_rate
}
// Reset the zero_last counter to 0 on key frame.
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
memset(cpi->consec_zero_last_mvbias, 0,
(cpi->common.mb_rows * cpi->common.mb_cols));
}
#if 0
@@ -4001,7 +4187,10 @@ static void encode_frame_to_data_rate
*/
if (cpi->cyclic_refresh_mode_enabled)
{
if (cpi->current_layer==0)
// Special case for screen_content_mode with golden frame updates.
int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 &&
cm->refresh_golden_frame);
if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf)
cyclic_background_refresh(cpi, Q, 0);
else
disable_segmentation(cpi);
@@ -4179,8 +4368,10 @@ static void encode_frame_to_data_rate
else
disable_segmentation(cpi);
}
// Reset the consec_zero_last counter on key frame.
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
// Reset the zero_last counter to 0 on key frame.
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
memset(cpi->consec_zero_last_mvbias, 0,
(cpi->common.mb_rows * cpi->common.mb_cols));
vp8_set_quantizer(cpi, Q);
}
@@ -4203,7 +4394,7 @@ static void encode_frame_to_data_rate
if (cm->refresh_entropy_probs == 0)
{
/* save a copy for later refresh */
vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
}
vp8_update_coef_context(cpi);
@@ -4221,6 +4412,11 @@ static void encode_frame_to_data_rate
/* transform / motion compensation build reconstruction frame */
vp8_encode_frame(cpi);
if (cpi->oxcf.screen_content_mode == 2) {
if (vp8_drop_encodedframe_overshoot(cpi, Q))
return;
}
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
#endif
@@ -4613,6 +4809,22 @@ static void encode_frame_to_data_rate
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
#if CONFIG_TEMPORAL_DENOISING
// Get some measure of the amount of noise, by measuring the (partial) mse
// between source and denoised buffer, for y channel. Partial refers to
// computing the sse for a sub-sample of the frame (i.e., skip x blocks along row/column),
// and only for blocks in that set that are consecutive ZEROMV_LAST mode.
// Do this every ~8 frames, to further reduce complexity.
// TODO(marpan): Keep this for now for the case cpi->oxcf.noise_sensitivity < 4,
// should be removed in favor of the process_denoiser_mode_change() function below.
if (cpi->oxcf.noise_sensitivity > 0 &&
cpi->oxcf.noise_sensitivity < 4 &&
!cpi->oxcf.screen_content_mode &&
cpi->frames_since_key%8 == 0 &&
cm->frame_type != KEY_FRAME) {
cpi->mse_source_denoised = measure_square_diff_partial(
&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi->Source, cpi);
}
// For the adaptive denoising mode (noise_sensitivity == 4), sample the mse
// of source diff (between current and previous frame), and determine if we
// should switch the denoiser mode. Sampling refers to computing the mse for
@@ -4621,6 +4833,7 @@ static void encode_frame_to_data_rate
// constraint on the sum diff between blocks. This process is called every
// ~8 frames, to further reduce complexity.
if (cpi->oxcf.noise_sensitivity == 4 &&
!cpi->oxcf.screen_content_mode &&
cpi->frames_since_key % 8 == 0 &&
cm->frame_type != KEY_FRAME) {
process_denoiser_mode_change(cpi);
@@ -4758,6 +4971,13 @@ static void encode_frame_to_data_rate
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
// If the frame dropper is not enabled, don't let the buffer level go below
// some threshold, given here by -|maximum_buffer_size|. For now we only do
// this for screen content input.
if (cpi->drop_frames_allowed == 0 && cpi->oxcf.screen_content_mode &&
cpi->bits_off_target < -cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = -cpi->oxcf.maximum_buffer_size;
/* Rolling monitors of whether we are over or underspending used to
* help regulate min and Max Q in two pass.
*/
@@ -5232,7 +5452,26 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->ref_framerate = 10000000.0 / avg_duration;
}
#if CONFIG_MULTI_RES_ENCODING
if (cpi->oxcf.mr_total_resolutions > 1) {
LOWER_RES_FRAME_INFO* low_res_frame_info = (LOWER_RES_FRAME_INFO*)
cpi->oxcf.mr_low_res_mode_info;
// Frame rate should be the same for all spatial layers in
// multi-res-encoding (simulcast), so we constrain the frame for
// higher layers to be that of lowest resolution. This is needed
// as he application may decide to skip encoding a high layer and
// then start again, in which case a big jump in time-stamps will
// be received for that high layer, which will yield an incorrect
// frame rate (from time-stamp adjustment in above calculation).
if (cpi->oxcf.mr_encoder_id) {
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
}
else {
// Keep track of frame rate for lowest resolution.
low_res_frame_info->low_res_framerate = cpi->ref_framerate;
}
}
#endif
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
@@ -5262,8 +5501,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
update_layer_contexts (cpi);
/* Restore layer specific context & set frame rate */
layer = cpi->oxcf.layer_id[
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
if (cpi->temporal_layer_id >= 0) {
layer = cpi->temporal_layer_id;
} else {
layer = cpi->oxcf.layer_id[
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
}
restore_layer_context (cpi, layer);
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
}
@@ -5382,19 +5625,19 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cm->refresh_entropy_probs == 0)
{
vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
}
/* Save the contexts separately for alt ref, gold and last. */
/* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */
if(cm->refresh_alt_ref_frame)
vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
if(cm->refresh_golden_frame)
vpx_memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
if(cm->refresh_last_frame)
vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
/* if its a dropped frame honor the requests on subsequent frames */
if (*size > 0)
@@ -5439,19 +5682,23 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
double frame_psnr;
YV12_BUFFER_CONFIG *orig = cpi->Source;
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
int y_samples = orig->y_height * orig->y_width ;
int uv_samples = orig->uv_height * orig->uv_width ;
unsigned int y_width = cpi->common.Width;
unsigned int y_height = cpi->common.Height;
unsigned int uv_width = (y_width + 1) / 2;
unsigned int uv_height = (y_height + 1) / 2;
int y_samples = y_height * y_width;
int uv_samples = uv_height * uv_width;
int t_samples = y_samples + 2 * uv_samples;
double sq_error;
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
recon->y_buffer, recon->y_stride, y_width, y_height);
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
recon->u_buffer, recon->uv_stride, uv_width, uv_height);
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
recon->v_buffer, recon->uv_stride, uv_width, uv_height);
sq_error = (double)(ye + ue + ve);
@@ -5473,13 +5720,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
vp8_clear_system_state();
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height);
pp->y_buffer, pp->y_stride, y_width, y_height);
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
pp->u_buffer, pp->uv_stride, uv_width, uv_height);
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
pp->v_buffer, pp->uv_stride, uv_width, uv_height);
sq_error2 = (double)(ye + ue + ve);
@@ -5606,6 +5853,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
cpi->common.show_frame_mi = cpi->common.mi;
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
#else
(void)flags;
if (cpi->common.frame_to_show)
{
@@ -5698,7 +5946,7 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns
{
if (map)
{
vpx_memcpy(cpi->active_map, map, rows * cols);
memcpy(cpi->active_map, map, rows * cols);
cpi->active_map_enabled = 1;
}
else
@@ -5745,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
Total += vpx_mse16x16(src + j, source->y_stride,
dst + j, dest->y_stride, &sse);
}
src += 16 * source->y_stride;
+19
View File
@@ -513,10 +513,20 @@ typedef struct VP8_COMP
signed char *cyclic_refresh_map;
// Count on how many (consecutive) times a macroblock uses ZER0MV_LAST.
unsigned char *consec_zero_last;
// Counter that is reset when a block is checked for a mode-bias against
// ZEROMV_LASTREF.
unsigned char *consec_zero_last_mvbias;
// Frame counter for the temporal pattern. Counter is rest when the temporal
// layers are changed dynamically (run-time change).
unsigned int temporal_pattern_counter;
// Temporal layer id.
int temporal_layer_id;
// Measure of average squared difference between source and denoised signal.
int mse_source_denoised;
int force_maxqp;
#if CONFIG_MULTITHREAD
/* multithread data */
@@ -657,6 +667,9 @@ typedef struct VP8_COMP
int droppable;
int initial_width;
int initial_height;
#if CONFIG_TEMPORAL_DENOISING
VP8_DENOISER denoiser;
#endif
@@ -687,6 +700,7 @@ typedef struct VP8_COMP
#endif
/* The frame number of each reference frames */
unsigned int current_ref_frames[MAX_REF_FRAMES];
// Closest reference frame to current frame.
MV_REFERENCE_FRAME closest_reference_frame;
struct rd_costs_struct
@@ -702,6 +716,11 @@ typedef struct VP8_COMP
} rd_costs;
} VP8_COMP;
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
unsigned char *dest_end, unsigned long *size);
+314 -75
View File
@@ -11,6 +11,7 @@
#include <limits.h>
#include "vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
@@ -29,8 +30,6 @@
#include "denoising.h"
#endif
extern int VP8_UVSSE(MACROBLOCK *x);
#ifdef SPEEDSTATS
extern unsigned int cnt_pm;
#endif
@@ -38,7 +37,133 @@ extern unsigned int cnt_pm;
extern const int vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
// Fixed point implementation of a skin color classifier. Skin color
// is model by a Gaussian distribution in the CbCr color space.
// See ../../test/skin_color_detector_test.cc where the reference
// skin color classifier is defined.
// Fixed-point skin color model parameters.
static const int skin_mean[2] = {7463, 9614}; // q6
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
static const int skin_threshold = 1570636; // q18
// Evaluates the Mahalanobis distance measure for the input CbCr values.
static int evaluate_skin_color_difference(int cb, int cr)
{
const int cb_q6 = cb << 6;
const int cr_q6 = cr << 6;
const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
const int skin_diff = skin_inv_cov[0] * cb_diff_q2 +
skin_inv_cov[1] * cbcr_diff_q2 +
skin_inv_cov[2] * cbcr_diff_q2 +
skin_inv_cov[3] * cr_diff_q2;
return skin_diff;
}
static int macroblock_corner_grad(unsigned char* signal, int stride,
int offsetx, int offsety, int sgnx, int sgny)
{
int y1 = signal[offsetx * stride + offsety];
int y2 = signal[offsetx * stride + offsety + sgny];
int y3 = signal[(offsetx + sgnx) * stride + offsety];
int y4 = signal[(offsetx + sgnx) * stride + offsety + sgny];
return MAX(MAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4));
}
static int check_dot_artifact_candidate(VP8_COMP *cpi,
MACROBLOCK *x,
unsigned char *target_last,
int stride,
unsigned char* last_ref,
int mb_row,
int mb_col,
int channel)
{
int threshold1 = 6;
int threshold2 = 3;
unsigned int max_num = (cpi->common.MBs) / 10;
int grad_last = 0;
int grad_source = 0;
int index = mb_row * cpi->common.mb_cols + mb_col;
// Threshold for #consecutive (base layer) frames using zero_last mode.
int num_frames = 30;
int shift = 15;
if (channel > 0) {
shift = 7;
}
if (cpi->oxcf.number_of_layers > 1)
{
num_frames = 20;
}
x->zero_last_dot_suppress = 0;
// Blocks on base layer frames that have been using ZEROMV_LAST repeatedly
// (i.e, at least |x| consecutive frames are candidates for increasing the
// rd adjustment for zero_last mode.
// Only allow this for at most |max_num| blocks per frame.
// Don't allow this for screen content input.
if (cpi->current_layer == 0 &&
cpi->consec_zero_last_mvbias[index] > num_frames &&
x->mbs_zero_last_dot_suppress < max_num &&
!cpi->oxcf.screen_content_mode)
{
// If this block is checked here, label it so we don't check it again until
// ~|x| framaes later.
x->zero_last_dot_suppress = 1;
// Dot artifact is noticeable as strong gradient at corners of macroblock,
// for flat areas. As a simple detector for now, we look for a high
// corner gradient on last ref, and a smaller gradient on source.
// Check 4 corners, return if any satisfy condition.
// Top-left:
grad_last = macroblock_corner_grad(last_ref, stride, 0, 0, 1, 1);
grad_source = macroblock_corner_grad(target_last, stride, 0, 0, 1, 1);
if (grad_last >= threshold1 && grad_source <= threshold2)
{
x->mbs_zero_last_dot_suppress++;
return 1;
}
// Top-right:
grad_last = macroblock_corner_grad(last_ref, stride, 0, shift, 1, -1);
grad_source = macroblock_corner_grad(target_last, stride, 0, shift, 1, -1);
if (grad_last >= threshold1 && grad_source <= threshold2)
{
x->mbs_zero_last_dot_suppress++;
return 1;
}
// Bottom-left:
grad_last = macroblock_corner_grad(last_ref, stride, shift, 0, -1, 1);
grad_source = macroblock_corner_grad(target_last, stride, shift, 0, -1, 1);
if (grad_last >= threshold1 && grad_source <= threshold2)
{
x->mbs_zero_last_dot_suppress++;
return 1;
}
// Bottom-right:
grad_last = macroblock_corner_grad(last_ref, stride, shift, shift, -1, -1);
grad_source = macroblock_corner_grad(target_last, stride, shift, shift, -1, -1);
if (grad_last >= threshold1 && grad_source <= threshold2)
{
x->mbs_zero_last_dot_suppress++;
return 1;
}
return 0;
}
return 0;
}
// Checks if the input yCbCr values corresponds to skin color.
static int is_skin_color(int y, int cb, int cr)
{
if (y < 40 || y > 220)
{
return 0;
}
return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
}
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
@@ -52,6 +177,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
(void) ref_mv;
(void) error_per_bit;
(void) vfp;
(void) mb;
(void) mvcost;
(void) distortion;
(void) sse;
@@ -90,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
}
unsigned int vp8_get4x4sse_cs_c
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride
)
{
int distortion = 0;
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int diff = src_ptr[c] - ref_ptr[c];
distortion += diff * diff;
}
src_ptr += source_stride;
ref_ptr += recon_stride;
}
return distortion;
}
static int get_prediction_error(BLOCK *be, BLOCKD *b)
{
unsigned char *sptr;
@@ -124,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
sptr = (*(be->base_src) + be->src);
dptr = b->predictor;
return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
}
@@ -514,10 +613,16 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2,
#endif
// Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame.
if (this_mode == ZEROMV &&
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
(denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME))
// TODO: We should also add condition on distance of closest to current.
if(!cpi->oxcf.screen_content_mode &&
this_mode == ZEROMV &&
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
(denoise_aggressive || (cpi->closest_reference_frame == LAST_FRAME)))
{
// No adjustment if block is considered to be skin area.
if(x->is_skin)
rd_adj = 100;
this_rd = ((int64_t)this_rd) * rd_adj / 100;
}
@@ -598,6 +703,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
#endif
int sf_improved_mv_pred = cpi->sf.improved_mv_pred;
#if CONFIG_MULTI_RES_ENCODING
int dissim = INT_MAX;
int parent_ref_frame = 0;
int_mv parent_ref_mv;
MB_PREDICTION_MODE parent_mode = 0;
int parent_ref_valid = 0;
#endif
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -608,14 +722,56 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
unsigned char *plane[4][3];
int ref_frame_map[4];
int sign_bias = 0;
int dot_artifact_candidate = 0;
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
// If the current frame is using LAST as a reference, check for
// biasing the mode selection for dot artifacts.
if (cpi->ref_frame_flags & VP8_LAST_FRAME) {
unsigned char* target_y = x->src.y_buffer;
unsigned char* target_u = x->block[16].src + *x->block[16].base_src;
unsigned char* target_v = x->block[20].src + *x->block[20].base_src;
int stride = x->src.y_stride;
int stride_uv = x->block[16].src_stride;
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity) {
const int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0;
target_y =
cpi->denoiser.yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset;
stride = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_stride;
if (uv_denoise) {
target_u =
cpi->denoiser.yv12_running_avg[LAST_FRAME].u_buffer +
recon_uvoffset;
target_v =
cpi->denoiser.yv12_running_avg[LAST_FRAME].v_buffer +
recon_uvoffset;
stride_uv = cpi->denoiser.yv12_running_avg[LAST_FRAME].uv_stride;
}
}
#endif
dot_artifact_candidate =
check_dot_artifact_candidate(cpi, x, target_y, stride,
plane[LAST_FRAME][0], mb_row, mb_col, 0);
// If not found in Y channel, check UV channel.
if (!dot_artifact_candidate) {
dot_artifact_candidate =
check_dot_artifact_candidate(cpi, x, target_u, stride_uv,
plane[LAST_FRAME][1], mb_row, mb_col, 1);
if (!dot_artifact_candidate) {
dot_artifact_candidate =
check_dot_artifact_candidate(cpi, x, target_v, stride_uv,
plane[LAST_FRAME][2], mb_row, mb_col, 2);
}
}
}
#if CONFIG_MULTI_RES_ENCODING
int dissim = INT_MAX;
int parent_ref_frame = 0;
int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
int_mv parent_ref_mv;
MB_PREDICTION_MODE parent_mode = 0;
// |parent_ref_valid| will be set here if potentially we can do mv resue for
// this higher resol (|cpi->oxcf.mr_encoder_id| > 0) frame.
// |parent_ref_valid| may be reset depending on |parent_ref_frame| for
// the current macroblock below.
parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
if (parent_ref_valid)
{
int parent_ref_flag;
@@ -633,24 +789,51 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
* In this event, take the conservative approach of disabling the
* lower res info for this MB.
*/
parent_ref_flag = 0;
// Note availability for mv reuse is only based on last and golden.
if (parent_ref_frame == LAST_FRAME)
parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME);
else if (parent_ref_frame == GOLDEN_FRAME)
parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME);
else if (parent_ref_frame == ALTREF_FRAME)
parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME);
//assert(!parent_ref_frame || parent_ref_flag);
// If |parent_ref_frame| did not match either last or golden then
// shut off mv reuse.
if (parent_ref_frame && !parent_ref_flag)
parent_ref_valid = 0;
// Don't do mv reuse since we want to allow for another mode besides
// ZEROMV_LAST to remove dot artifact.
if (dot_artifact_candidate)
parent_ref_valid = 0;
}
#endif
// Check if current macroblock is in skin area.
{
const int y = x->src.y_buffer[7 * x->src.y_stride + 7];
const int cb = x->src.u_buffer[3 * x->src.uv_stride + 3];
const int cr = x->src.v_buffer[3 * x->src.uv_stride + 3];
x->is_skin = 0;
if (!cpi->oxcf.screen_content_mode)
x->is_skin = is_skin_color(y, cb, cr);
}
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity) {
// Under aggressive denoising mode, should we use skin map to reduce denoiser
// and ZEROMV bias? Will need to revisit the accuracy of this detection for
// very noisy input. For now keep this as is (i.e., don't turn it off).
// if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive)
// x->is_skin = 0;
}
#endif
mode_mv = mode_mv_sb[sign_bias];
best_ref_mv.as_int = 0;
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
memset(&best_mbmode, 0, sizeof(best_mbmode));
/* Setup search priorities */
#if CONFIG_MULTI_RES_ENCODING
@@ -681,8 +864,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
}
get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
/* Count of the number of MBs tested so far this frame */
x->mbs_tested_so_far++;
@@ -692,9 +873,13 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
/* If the frame has big static background and current MB is in low
* motion area, its mode decision is biased to ZEROMV mode.
*/
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
* motion area, its mode decision is biased to ZEROMV mode.
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
* At such speed settings, ZEROMV is already heavily favored.
*/
if (cpi->Speed < 12) {
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
}
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity) {
@@ -703,6 +888,13 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
#endif
if (dot_artifact_candidate)
{
// Bias against ZEROMV_LAST mode.
rd_adjustment = 150;
}
/* if we encode a new mv this is important
* find the best new motion vector
*/
@@ -819,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
else
{
rate2 += rate;
distortion2 = vp8_variance16x16(
distortion2 = vpx_variance16x16(
*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -848,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
xd->dst.y_stride,
xd->predictor,
16);
distortion2 = vp8_variance16x16
distortion2 = vpx_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -888,14 +1080,17 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
step_param = cpi->sf.first_step + speed_adjust;
#if CONFIG_MULTI_RES_ENCODING
/* If lower-res drops this frame, then higher-res encoder does
motion search without any previous knowledge. Also, since
last frame motion info is not stored, then we can not
/* If lower-res frame is not available for mv reuse (because of
frame dropping or different temporal layer pattern), then higher
resol encoder does motion search without any previous knowledge.
Also, since last frame motion info is not stored, then we can not
use improved_mv_pred. */
if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
if (cpi->oxcf.mr_encoder_id)
sf_improved_mv_pred = 0;
if (parent_ref_valid && parent_ref_frame)
// Only use parent MV as predictor if this candidate reference frame
// (|this_ref_frame|) is equal to |parent_ref_frame|.
if (parent_ref_valid && (parent_ref_frame == this_ref_frame))
{
/* Use parent MV as predictor. Adjust search range
* accordingly.
@@ -939,7 +1134,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}
#if CONFIG_MULTI_RES_ENCODING
if (parent_ref_valid && parent_ref_frame && dissim <= 2 &&
if (parent_ref_valid && (parent_ref_frame == this_ref_frame) &&
dissim <= 2 &&
MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
{
@@ -976,10 +1172,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
* change the behavior in lowest-resolution encoder.
* Will improve it later.
*/
/* Set step_param to 0 to ensure large-range motion search
when encoder drops this frame at lower-resolution.
*/
if (!parent_ref_valid)
/* Set step_param to 0 to ensure large-range motion search
* when mv reuse if not valid (i.e. |parent_ref_valid| = 0),
* or if this candidate reference frame (|this_ref_frame|) is
* not equal to |parent_ref_frame|.
*/
if (!parent_ref_valid || (parent_ref_frame != this_ref_frame))
step_param = 0;
#endif
bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv,
@@ -1081,18 +1279,24 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
/* Store for later use by denoiser. */
if (this_mode == ZEROMV && sse < zero_mv_sse )
// Dont' denoise with GOLDEN OR ALTREF is they are old reference
// frames (greater than MAX_GF_ARF_DENOISE_RANGE frames in past).
int skip_old_reference = ((this_ref_frame != LAST_FRAME) &&
(cpi->common.current_video_frame -
cpi->current_ref_frames[this_ref_frame] >
MAX_GF_ARF_DENOISE_RANGE)) ? 1 : 0;
if (this_mode == ZEROMV && sse < zero_mv_sse &&
!skip_old_reference)
{
zero_mv_sse = sse;
x->best_zeromv_reference_frame =
x->e_mbd.mode_info_context->mbmi.ref_frame;
}
/* Store the best NEWMV in x for later use in the denoiser. */
// Store the best NEWMV in x for later use in the denoiser.
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
sse < best_sse)
sse < best_sse && !skip_old_reference)
{
best_sse = sse;
x->best_sse_inter_mode = NEWMV;
@@ -1114,8 +1318,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returndistortion = distortion2;
best_rd_sse = sse;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
sizeof(MB_MODE_INFO));
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
sizeof(MB_MODE_INFO));
/* Testing this mode gave rise to an improvement in best error
* score. Lower threshold a bit for next time
@@ -1178,6 +1382,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if (cpi->oxcf.noise_sensitivity)
{
int block_index = mb_row * cpi->common.mb_cols + mb_col;
int reevaluate = 0;
int is_noisy = 0;
if (x->best_sse_inter_mode == DC_PRED)
{
/* No best MV found. */
@@ -1187,18 +1393,52 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->best_reference_frame = best_mbmode.ref_frame;
best_sse = best_rd_sse;
}
// For non-skin blocks that have selected ZEROMV for this current frame,
// and have been selecting ZEROMV_LAST (on the base layer frame) at
// least |x~20| consecutive past frames in a row, label the block for
// possible increase in denoising strength. We also condition this
// labeling on there being significant denoising in the scene
if (cpi->oxcf.noise_sensitivity == 4) {
if (cpi->denoiser.nmse_source_diff >
70 * cpi->denoiser.threshold_aggressive_mode / 100)
is_noisy = 1;
} else {
if (cpi->mse_source_denoised > 1000)
is_noisy = 1;
}
x->increase_denoising = 0;
if (!x->is_skin &&
x->best_sse_inter_mode == ZEROMV &&
(x->best_reference_frame == LAST_FRAME ||
x->best_reference_frame == cpi->closest_reference_frame) &&
cpi->consec_zero_last[block_index] >= 20 &&
is_noisy) {
x->increase_denoising = 1;
}
x->denoise_zeromv = 0;
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
recon_yoffset, recon_uvoffset,
&cpi->common.lf_info, mb_row, mb_col,
block_index);
/* Reevaluate ZEROMV after denoising. */
if (best_mbmode.ref_frame == INTRA_FRAME &&
// Reevaluate ZEROMV after denoising: for large noise content
// (i.e., cpi->mse_source_denoised is above threshold), do this for all
// blocks that did not pick ZEROMV as best mode but are using ZEROMV
// for denoising. Otherwise, always re-evaluate for blocks that picked
// INTRA mode as best mode.
// Avoid blocks that have been biased against ZERO_LAST
// (i.e., dot artifact candidate blocks).
reevaluate = (best_mbmode.ref_frame == INTRA_FRAME) ||
(best_mbmode.mode != ZEROMV &&
x->denoise_zeromv &&
cpi->mse_source_denoised > 2000);
if (!dot_artifact_candidate &&
reevaluate &&
x->best_zeromv_reference_frame != INTRA_FRAME)
{
int this_rd = 0;
int this_ref_frame = x->best_zeromv_reference_frame;
rd_adjustment = 100;
rate2 = x->ref_frame_cost[this_ref_frame] +
vp8_cost_mv_ref(ZEROMV, mdcounts);
distortion2 = 0;
@@ -1217,8 +1457,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if (this_rd < best_rd)
{
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
sizeof(MB_MODE_INFO));
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
sizeof(MB_MODE_INFO));
}
}
@@ -1242,8 +1482,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* set to the best mb mode, this copy can be skip if x->skip since it
* already has the right content */
if (!x->skip)
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
sizeof(MB_MODE_INFO));
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
sizeof(MB_MODE_INFO));
if (best_mbmode.mode <= B_PRED)
{
@@ -1258,7 +1498,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
update_mvcount(x, &best_ref_mv);
}
void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
{
int error4x4, error16x16 = INT_MAX;
@@ -1282,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
xd->dst.y_stride,
xd->predictor,
16);
distortion = vp8_variance16x16
distortion = vpx_variance16x16
(*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
rate = x->mbmode_cost[xd->frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+6 -6
View File
@@ -9,6 +9,7 @@
*/
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
@@ -49,7 +50,7 @@ static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
src_y = src_ybc->y_buffer + yoffset;
dst_y = dst_ybc->y_buffer + yoffset;
vpx_memcpy(dst_y, src_y, ystride * linestocopy);
memcpy(dst_y, src_y, ystride * linestocopy);
}
static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
@@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
Total += vp8_mse16x16(src + j, source->y_stride,
Total += vpx_mse16x16(src + j, source->y_stride,
dst + j, dest->y_stride,
&sse);
}
@@ -142,7 +143,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
int filt_val;
int best_filt_val = cm->filter_level;
int best_filt_val;
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
/* Replace unfiltered frame buffer with a new one */
@@ -274,8 +275,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
int filter_step;
int filt_high = 0;
/* Start search at previous frame filter level */
int filt_mid = cm->filter_level;
int filt_mid;
int filt_low = 0;
int filt_best;
int filt_direction = 0;
@@ -287,7 +287,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
vpx_memset(ss_err, 0, sizeof(ss_err));
memset(ss_err, 0, sizeof(ss_err));
/* Replace unfiltered frame buffer with a new one */
cm->frame_to_show = &cpi->pick_lf_lvl_frame;
+21 -24
View File
@@ -65,8 +65,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
short *dequant_ptr = d->dequant;
short zbin_oq_value = b->zbin_extra;
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
memset(qcoeff_ptr, 0, 32);
memset(dqcoeff_ptr, 0, 32);
eob = -1;
@@ -101,7 +101,7 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
*d->eob = (char)(eob + 1);
}
void vp8_quantize_mby_c(MACROBLOCK *x)
void vp8_quantize_mby(MACROBLOCK *x)
{
int i;
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -114,7 +114,7 @@ void vp8_quantize_mby_c(MACROBLOCK *x)
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
void vp8_quantize_mb_c(MACROBLOCK *x)
void vp8_quantize_mb(MACROBLOCK *x)
{
int i;
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -125,7 +125,7 @@ void vp8_quantize_mb_c(MACROBLOCK *x)
}
void vp8_quantize_mbuv_c(MACROBLOCK *x)
void vp8_quantize_mbuv(MACROBLOCK *x)
{
int i;
@@ -133,23 +133,6 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x)
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
}
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
* of blocks. */
void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
{
vp8_regular_quantize_b(b1, d1);
vp8_regular_quantize_b(b2, d2);
}
void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
{
vp8_fast_quantize_b_c(b1, d1);
vp8_fast_quantize_b_c(b2, d2);
}
static const int qrounding_factors[129] =
{
48, 48, 48, 48, 48, 48, 48, 48,
@@ -552,6 +535,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
int update = 0;
int new_delta_q;
int new_uv_delta_q;
cm->base_qindex = Q;
/* if any of the delta_q values are changing update flag has to be set */
@@ -559,8 +543,6 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
cm->y1dc_delta_q = 0;
cm->y2ac_delta_q = 0;
cm->uvdc_delta_q = 0;
cm->uvac_delta_q = 0;
if (Q < 4)
{
@@ -572,6 +554,21 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
update |= cm->y2dc_delta_q != new_delta_q;
cm->y2dc_delta_q = new_delta_q;
new_uv_delta_q = 0;
// For screen content, lower the q value for UV channel. For now, select
// conservative delta; same delta for dc and ac, and decrease it with lower
// Q, and set to 0 below some threshold. May want to condition this in
// future on the variance/energy in UV channel.
if (cpi->oxcf.screen_content_mode && Q > 40) {
new_uv_delta_q = -(int)(0.15 * Q);
// Check range: magnitude of delta is 4 bits.
if (new_uv_delta_q < -15) {
new_uv_delta_q = -15;
}
}
update |= cm->uvdc_delta_q != new_uv_delta_q;
cm->uvdc_delta_q = new_uv_delta_q;
cm->uvac_delta_q = new_uv_delta_q;
/* Set Segment specific quatizers */
mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
+3
View File
@@ -18,6 +18,9 @@ extern "C" {
struct VP8_COMP;
struct macroblock;
extern void vp8_quantize_mb(struct macroblock *x);
extern void vp8_quantize_mby(struct macroblock *x);
extern void vp8_quantize_mbuv(struct macroblock *x);
extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q);
extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi);
extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
+59 -5
View File
@@ -296,7 +296,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
vp8_default_coef_probs(& cpi->common);
vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
{
int flag[2] = {1, 1};
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
@@ -305,9 +305,9 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
/* Make sure we initialize separate contexts for altref,gold, and normal.
* TODO shouldn't need 3 different copies of structure to do this!
*/
vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
@@ -708,7 +708,13 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
Adjustment = (cpi->this_frame_target - min_frame_target);
if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
{
Adjustment = (cpi->current_gf_interval - 1) * Adjustment;
// Limit adjustment to 10% of current target.
if (Adjustment > (10 * cpi->this_frame_target) / 100)
Adjustment = (10 * cpi->this_frame_target) / 100;
cpi->this_frame_target += Adjustment;
}
else
cpi->this_frame_target -= Adjustment;
}
@@ -1209,6 +1215,11 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
int Q = cpi->active_worst_quality;
if (cpi->force_maxqp == 1) {
cpi->active_worst_quality = cpi->worst_quality;
return cpi->worst_quality;
}
/* Reset Zbin OQ value */
cpi->mb.zbin_over_quant = 0;
@@ -1553,3 +1564,46 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
}
return 1;
}
// If this just encoded frame (mcomp/transform/quant, but before loopfilter and
// pack_bitstream) has large overshoot, and was not being encoded close to the
// max QP, then drop this frame and force next frame to be encoded at max QP.
// Condition this on 1 pass CBR with screen content mode and frame dropper off.
// TODO(marpan): Should do this exit condition during the encode_frame
// (i.e., halfway during the encoding of the frame) to save cycles.
int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
if (cpi->pass == 0 &&
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
cpi->drop_frames_allowed == 0 &&
cpi->common.frame_type != KEY_FRAME) {
// Note: the "projected_frame_size" from encode_frame() only gives estimate
// of mode/motion vector rate (in non-rd mode): so below we only require
// that projected_frame_size is somewhat greater than per-frame-bandwidth,
// but add additional condition with high threshold on prediction residual.
// QP threshold: only allow dropping if we are not close to qp_max.
int thresh_qp = 3 * cpi->worst_quality >> 2;
// Rate threshold, in bytes.
int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3);
// Threshold for the average (over all macroblocks) of the pixel-sum
// residual error over 16x16 block. Should add QP dependence on threshold?
int thresh_pred_err_mb = (256 << 4);
int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs);
if (Q < thresh_qp &&
cpi->projected_frame_size > thresh_rate &&
pred_err_mb > thresh_pred_err_mb) {
// Drop this frame: advance frame counters, and set force_maxqp flag.
cpi->common.current_video_frame++;
cpi->frames_since_key++;
// Flag to indicate we will force next frame to be encoded at max QP.
cpi->force_maxqp = 1;
return 1;
} else {
cpi->force_maxqp = 0;
return 0;
}
cpi->force_maxqp = 0;
return 0;
}
cpi->force_maxqp = 0;
return 0;
}
+2
View File
@@ -30,6 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
/* return of 0 means drop frame */
extern int vp8_pick_frame_size(VP8_COMP *cpi);
extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q);
#ifdef __cplusplus
} // extern "C"
#endif
+45 -42
View File
@@ -15,6 +15,7 @@
#include <assert.h>
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "tokenize.h"
#include "treewriter.h"
#include "onyx_int.h"
@@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x)
}
else
{
vp8_variance8x8(uptr, pre_stride,
vpx_variance8x8(uptr, pre_stride,
upred_ptr, uv_stride, &sse2);
vp8_variance8x8(vptr, pre_stride,
vpx_variance8x8(vptr, pre_stride,
vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
}
@@ -555,8 +556,8 @@ static int vp8_rdcost_mby(MACROBLOCK *mb)
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -650,8 +651,8 @@ static int rd_pick_intra4x4block(
* a temp buffer that meets the stride requirements, but we are only
* interested in the left 4x4 block
* */
DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4);
DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
DECLARE_ALIGNED(16, unsigned char, best_predictor[16*4]);
DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
int dst_stride = x->e_mbd.dst.y_stride;
unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
@@ -691,7 +692,7 @@ static int rd_pick_intra4x4block(
*a = tempa;
*l = templ;
copy_predictor(best_predictor, b->predictor);
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
memcpy(best_dqcoeff, b->dqcoeff, 32);
}
}
b->bmi.as_mode = *best_mode;
@@ -715,8 +716,8 @@ static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
ENTROPY_CONTEXT *tl;
const int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -820,8 +821,8 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -837,6 +838,9 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel)
{
(void)cpi;
(void)fullpixel;
vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
vp8_subtract_mbuv(x->src_diff,
x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
@@ -854,6 +858,9 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel)
{
(void)cpi;
(void)fullpixel;
vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
vp8_subtract_mbuv(x->src_diff,
x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
@@ -1122,8 +1129,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
ENTROPY_CONTEXT *ta_b;
ENTROPY_CONTEXT *tl_b;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
@@ -1166,8 +1173,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
ENTROPY_CONTEXT *ta_s;
ENTROPY_CONTEXT *tl_s;
vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
ta_s = (ENTROPY_CONTEXT *)&t_above_s;
tl_s = (ENTROPY_CONTEXT *)&t_left_s;
@@ -1323,14 +1330,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
mode_selected = this_mode;
best_label_rd = this_rd;
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
}
} /*for each 4x4 mode*/
vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
bsi->ref_mv, x->mvcost);
@@ -1386,7 +1393,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
int i;
BEST_SEG_INFO bsi;
vpx_memset(&bsi, 0, sizeof(bsi));
memset(&bsi, 0, sizeof(bsi));
bsi.segment_rd = best_rd;
bsi.ref_mv = best_ref_mv;
@@ -1655,7 +1662,6 @@ void vp8_mv_pred
mv.as_mv.row = mvx[vcnt/2];
mv.as_mv.col = mvy[vcnt/2];
find = 1;
/* sr is set to 0 to allow calling function to decide the search
* range.
*/
@@ -1685,16 +1691,16 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
}else if(xd->mb_to_top_edge==0)
{ /* only has left MB for sad calculation. */
near_sad[0] = near_sad[2] = INT_MAX;
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
}else if(xd->mb_to_left_edge ==0)
{ /* only has left MB for sad calculation. */
near_sad[1] = near_sad[2] = INT_MAX;
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
}else
{
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride);
}
if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1709,14 +1715,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
if(near_sad[4] != INT_MAX)
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride);
if(near_sad[5] != INT_MAX)
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride);
if(near_sad[6] != INT_MAX)
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
if(near_sad[7] != INT_MAX)
near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride);
}
if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1778,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
if(threshold < x->encode_breakout)
threshold = x->encode_breakout;
var = vp8_variance16x16
var = vpx_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
@@ -1920,8 +1926,8 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd,
(rd->distortion2-rd->distortion_uv));
best_mode->rd = this_rd;
vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
if ((this_mode == B_PRED) || (this_mode == SPLITMV))
{
@@ -1983,9 +1989,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
best_mode.rd = INT_MAX;
best_mode.yrd = INT_MAX;
best_mode.intra_rd = INT_MAX;
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
/* Setup search priorities */
get_reference_search_order(cpi, ref_frame_map);
@@ -2287,7 +2293,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
/* Further step/diamond searches as necessary */
n = 0;
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
n = num00;
@@ -2554,8 +2559,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
intra_rd_penalty, cpi, x);
if (this_rd < best_mode.rd || x->skip)
{
/* Note index of best mode so far */
best_mode_index = mode_index;
*returnrate = rd.rate2;
*returndistortion = rd.distortion2;
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
@@ -2580,7 +2583,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* macroblock modes */
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
if (best_mode.mbmode.mode == B_PRED)
{
@@ -2593,7 +2596,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
for (i = 0; i < 16; i++)
xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
x->e_mbd.mode_info_context->mbmi.mv.as_int =
x->partition_info->bmi[15].mv.as_int;
+3
View File
@@ -136,6 +136,9 @@ extern void vp8_mv_pred
int near_sadidx[]
);
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
int VP8_UVSSE(MACROBLOCK *x);
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
#ifdef __cplusplus
} // extern "C"
+1 -1
View File
@@ -23,7 +23,7 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
{
/* Reset Gf useage monitors */
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
}
else
+8 -6
View File
@@ -163,6 +163,8 @@ static int vp8_temporal_filter_find_matching_mb_c
int pre = d->offset;
int pre_stride = x->e_mbd.pre.y_stride;
(void)error_thresh;
best_ref_mv1.as_int = 0;
best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
@@ -236,12 +238,12 @@ static void vp8_temporal_filter_iterate_c
int mb_rows = cpi->common.mb_rows;
int mb_y_offset = 0;
int mb_uv_offset = 0;
DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
DECLARE_ALIGNED(16, unsigned int, accumulator[16*16 + 8*8 + 8*8]);
DECLARE_ALIGNED(16, unsigned short, count[16*16 + 8*8 + 8*8]);
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
unsigned char *dst1, *dst2;
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
DECLARE_ALIGNED(16, unsigned char, predictor[16*16 + 8*8 + 8*8]);
/* Save input state */
unsigned char *y_buffer = mbd->pre.y_buffer;
@@ -272,8 +274,8 @@ static void vp8_temporal_filter_iterate_c
int i, j, k;
int stride;
vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
vpx_memset(count, 0, 384*sizeof(unsigned short));
memset(accumulator, 0, 384*sizeof(unsigned int));
memset(count, 0, 384*sizeof(unsigned short));
#if ALT_REF_MC_ENABLED
cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
@@ -500,7 +502,7 @@ void vp8_temporal_filter_prepare_c
start_frame = distance + frames_to_blur_forward;
/* Setup frame pointers, NULL indicates frame not included in filter */
vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
for (frame = 0; frame < frames_to_blur; frame++)
{
int which_buffer = start_frame - frame;
+5 -5
View File
@@ -421,7 +421,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
void init_context_counters(void)
{
vpx_memset(context_counters, 0, sizeof(context_counters));
memset(context_counters, 0, sizeof(context_counters));
}
void print_context_counters()
@@ -596,13 +596,13 @@ void vp8_fix_contexts(MACROBLOCKD *x)
/* Clear entropy contexts for Y2 blocks */
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
else
{
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
}
}
@@ -1,93 +0,0 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/asm_offsets.h"
#include "vpx_config.h"
#include "block.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "tokenize.h"
BEGIN
/* regular quantize */
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
DEFINE(vp8_block_round, offsetof(BLOCK, round));
DEFINE(vp8_block_quant, offsetof(BLOCK, quant));
DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
DEFINE(vp8_block_zbin_extra, offsetof(BLOCK, zbin_extra));
DEFINE(vp8_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
DEFINE(vp8_block_quant_shift, offsetof(BLOCK, quant_shift));
DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
/* subtract */
DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
DEFINE(vp8_block_src, offsetof(BLOCK, src));
DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
/* pack tokens */
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
DEFINE(vp8_token_value, offsetof(vp8_token, value));
DEFINE(vp8_token_len, offsetof(vp8_token, Len));
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
DEFINE(vp8_writer_sz , sizeof(vp8_writer));
DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
END
/* add asserts for any offset that is not supported by assembly code
* add asserts for any size that is not supported by assembly code
* These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
* change they will have to be adjusted.
*/
#if HAVE_EDSP
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
#endif
@@ -121,12 +121,12 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
if (abs_sum_diff > sum_diff_thresh) {
// Before returning to copy the block (i.e., apply no denoising),
// checK if we can still apply some (weaker) temporal filtering to
// check if we can still apply some (weaker) temporal filtering to
// this block, that would otherwise not be denoised at all. Simplest
// is to apply an additional adjustment to running_avg_y to bring it
// closer to sig. The adjustment is capped by a maximum delta, and
// chosen such that in most cases the resulting sum_diff will be
// within the accceptable range given by sum_diff_thresh.
// within the acceptable range given by sum_diff_thresh.
// The delta is set by the excess of absolute pixel diff over the
// threshold.
@@ -302,12 +302,12 @@ int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
if (abs_sum_diff > sum_diff_thresh) {
// Before returning to copy the block (i.e., apply no denoising),
// checK if we can still apply some (weaker) temporal filtering to
// check if we can still apply some (weaker) temporal filtering to
// this block, that would otherwise not be denoised at all. Simplest
// is to apply an additional adjustment to running_avg_y to bring it
// closer to sig. The adjustment is capped by a maximum delta, and
// chosen such that in most cases the resulting sum_diff will be
// within the accceptable range given by sum_diff_thresh.
// within the acceptable range given by sum_diff_thresh.
// The delta is set by the excess of absolute pixel diff over the
// threshold.

Some files were not shown because too many files have changed in this diff Show More