Skip to content
Snippets Groups Projects
Commit d8de3cd6 authored by Leo Ma's avatar Leo Ma
Browse files

Upgrade libx264


Signed-off-by: default avatarLeo Ma <begeekmyfriend@gmail.com>
parent 0984dd59
No related branches found
No related tags found
No related merge requests found
Showing
with 277 additions and 216 deletions
Loading
Loading
@@ -43,6 +43,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_YUYV] = { "yuyv", 1, { 2 }, { 1 }, 2, 1 },
[X264_CSP_UYVY] = { "uyvy", 1, { 2 }, { 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
[X264_CSP_RGB] = { "rgb", 1, { 3 }, { 1 }, 1, 1 },
Loading
Loading
Loading
Loading
@@ -98,6 +98,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
uint64_t size = ftell( h->fh );
fseek( h->fh, 0, SEEK_SET );
info->num_frames = size / h->frame_size;
FAIL_IF_ERROR( !info->num_frames, "empty input file\n" );
 
/* Attempt to use memory-mapped input frames if possible */
if( !(h->bit_depth & 7) )
Loading
Loading
Loading
Loading
@@ -223,6 +223,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
uint64_t i_size = ftell( h->fh );
fseek( h->fh, init_pos, SEEK_SET );
info->num_frames = (i_size - h->seq_header_len) / h->frame_size;
FAIL_IF_ERROR( !info->num_frames, "empty input file\n" );
 
/* Attempt to use memory-mapped input frames if possible */
if( !(h->bit_depth & 7) )
Loading
Loading
No preview for this file type
No preview for this file type
Loading
Loading
@@ -153,7 +153,11 @@ cglobal checkasm_call, 2,15,16,max_args*8+8
mov r9, rax
mov r10, rdx
lea r0, [error_message]
%if FORMAT_ELF
call puts wrt ..plt
%else
call puts
%endif
mov r1, [rsp+max_args*8]
mov dword [r1], 0
mov rdx, r10
Loading
Loading
@@ -221,3 +225,14 @@ cglobal stack_pagealign, 2,2
leave
RET
 
; Trigger a warmup of vector units
%macro WARMUP 0
cglobal checkasm_warmup, 0,0
xorps m0, m0
RET
%endmacro
INIT_YMM avx
WARMUP
INIT_ZMM avx512
WARMUP
Loading
Loading
@@ -25,9 +25,7 @@
 
#include "../common/aarch64/asm.S"
 
.section .rodata
.align 4
register_init:
const register_init, align=4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
Loading
Loading
@@ -46,10 +44,12 @@ register_init:
.quad 0xd229e1f5b281303f
.quad 0x71aeaff20b095fd9
.quad 0xab63e2e11fa38ed9
endconst
 
 
error_message:
const error_message
.asciz "failed to preserve register"
endconst
 
.text
 
Loading
Loading
@@ -149,7 +149,7 @@ function x264_checkasm_call, export=1
mov w9, #0
str w9, [x2]
movrel x0, error_message
bl puts
bl X(puts)
0:
ldp x0, x1, [sp], #16
ldp d14, d15, [sp], #16
Loading
Loading
Loading
Loading
@@ -25,9 +25,7 @@
 
#include "../common/arm/asm.S"
 
.section .rodata
.align 4
register_init:
const register_init, align=4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
Loading
Loading
@@ -36,9 +34,11 @@ register_init:
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
endconst
 
error_message:
const error_message
.asciz "failed to preserve register"
endconst
 
.text
 
Loading
Loading
This diff is collapsed.
Loading
Loading
@@ -63,7 +63,7 @@ while (@ARGV) {
$force_thumb = 1;
} elsif ($opt eq "-arch") {
$arch = shift;
die "unknown arch: '$arch'\n" if not exists $comments{$arch};
die "unknown arch: '$arch'\n" if not exists $canonical_arch{$arch};
} elsif ($opt eq "-as-type") {
$as_type = shift;
die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
Loading
Loading
@@ -429,7 +429,7 @@ sub parse_line {
 
sub handle_set {
my $line = $_[0];
if ($line =~ /\.set\s+(.*),\s*(.*)/) {
if ($line =~ /\.(?:set|equ)\s+(\S*)\s*,\s*(.*)/) {
$symbols{$1} = eval_expr($2);
return 1;
}
Loading
Loading
@@ -874,7 +874,7 @@ sub handle_serialized_line {
# Don't interpret e.g. bic as b<cc> with ic as conditional code
if ($cond !~ /|$arm_cond_codes/) {
# Not actually a branch
} elsif ($target =~ /(\d+)([bf])/) {
} elsif ($target =~ /^(\d+)([bf])$/) {
# The target is a local label
$line = handle_local_label($line, $1, $2);
$line =~ s/\b$instr\b/$&.w/ if $width eq "";
Loading
Loading
@@ -888,12 +888,12 @@ sub handle_serialized_line {
}
 
# ALIGN in armasm syntax is the actual number of bytes
if ($line =~ /\.align\s+(\d+)/) {
if ($line =~ /\.(?:p2)?align\s+(\d+)/) {
my $align = 1 << $1;
$line =~ s/\.align\s(\d+)/ALIGN $align/;
$line =~ s/\.(?:p2)?align\s(\d+)/ALIGN $align/;
}
# Convert gas style [r0, :128] into armasm [r0@128] alignment specification
$line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
$line =~ s/\[([^\[,]+),?\s*:(\d+)\]/[$1\@$2]/g;
 
# armasm treats logical values {TRUE} and {FALSE} separately from
# numeric values - logical operators and values can't be intermixed
Loading
Loading
@@ -930,7 +930,7 @@ sub handle_serialized_line {
# Misc bugs/deficiencies:
# armasm seems unable to parse e.g. "vmov s0, s1" without a type
# qualifier, thus add .f32.
$line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
$line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
# armasm is unable to parse &0x - add spacing
$line =~ s/&0x/& 0x/g;
}
Loading
Loading
@@ -939,16 +939,31 @@ sub handle_serialized_line {
# Convert register post indexing to a separate add instruction.
# This converts e.g. "ldr r0, [r1], r2" into "ldr r0, [r1]",
# "add r1, r1, r2".
$line =~ s/(ldr|str)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g;
$line =~ s/((?:ldr|str)[bh]?)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g;
 
# Convert "mov pc, lr" into "bx lr", since the former only works
# for switching from arm to thumb (and only in armv7), but not
# from thumb to arm.
s/mov\s*pc\s*,\s*lr/bx lr/g;
 
# Convert stmdb/ldmia with only one register into a plain str/ldr with post-increment/decrement
$line =~ s/stmdb\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g;
$line =~ s/ldmia\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g;
# Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement.
# Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too.
$line =~ s/stm(?:db|fd)\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g;
$line =~ s/ldm(?:ia|fd)?\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g;
# Convert muls into mul+cmp
$line =~ s/muls\s+(\w+),\s*(\w+)\,\s*(\w+)/mul $1, $2, $3\n\tcmp $1, #0/g;
# Convert "and r0, sp, #xx" into "mov r0, sp", "and r0, r0, #xx"
$line =~ s/and\s+(\w+),\s*(sp|r13)\,\s*#(\w+)/mov $1, $2\n\tand $1, $1, #$3/g;
# Convert "ldr r0, [r0, r1, lsl #6]" where the shift is >3 (which
# can't be handled in thumb) into "add r0, r0, r1, lsl #6",
# "ldr r0, [r0]", for the special case where the same address is
# used as base and target for the ldr.
if ($line =~ /(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/ and $4 > 3) {
$line =~ s/(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/add $2, $2, $3, lsl #$4\n\t$1 $2, [$2]/;
}
 
$line =~ s/\.arm/.thumb/x;
}
Loading
Loading
@@ -978,6 +993,9 @@ sub handle_serialized_line {
$line =~ s/\.int/.long/x;
$line =~ s/\.float/.single/x;
}
if ($as_type eq "apple-gas") {
$line =~ s/vmrs\s+APSR_nzcv/fmrx r15/x;
}
if ($as_type eq "armasm") {
$line =~ s/\.global/EXPORT/x;
$line =~ s/\.int/dcd/x;
Loading
Loading
@@ -986,11 +1004,15 @@ sub handle_serialized_line {
$line =~ s/\.word/dcd/x;
$line =~ s/\.short/dcw/x;
$line =~ s/\.byte/dcb/x;
$line =~ s/\.quad/dcq/x;
$line =~ s/\.ascii/dcb/x;
$line =~ s/\.asciz(.*)$/dcb\1,0/x;
$line =~ s/\.thumb/THUMB/x;
$line =~ s/\.arm/ARM/x;
# The alignment in AREA is the power of two, just as .align in gas
$line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
$line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/;
$line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
$line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/;
 
$line =~ s/fmxr/vmsr/;
$line =~ s/fmrx/vmrs/;
Loading
Loading
Loading
Loading
@@ -23,6 +23,12 @@ if command -v cygpath >/dev/null 2>&1 ; then
IFS='
'
deps="$(cygpath -u -- $deps)"
elif grep -q 'Microsoft' /proc/sys/kernel/osrelease 2>/dev/null ; then
# Running under WSL. We don't have access to cygpath but since the Windows
# file system resides under "/mnt/<drive_letter>/" we can simply replace
# "C:" with "/mnt/c". This command uses a GNU extension to sed but that's
# available on WSL so we don't need to limit ourselves by what POSIX says.
deps="$(printf '%s' "$deps" | sed 's/^\([a-zA-Z]\):/\/mnt\/\L\1/')"
fi
 
# Escape characters as required to create valid Makefile file names
Loading
Loading
Loading
Loading
@@ -420,47 +420,47 @@ static char *stringify_names( char *buf, const char * const names[] )
return buf;
}
 
#define INDENT " "
#define INDENT_LEN 32 // strlen( INDENT )
#define SEPARATOR ", "
#define SEPARATOR_LEN 2 // strlen( SEPARATOR )
static void print_csp_name_internal( const char *name, size_t *line_len, int last )
{
if( name )
{
size_t name_len = strlen( name );
if( *line_len + name_len > (80 - SEPARATOR_LEN) )
{
printf( "\n" INDENT );
*line_len = INDENT_LEN;
}
printf( "%s", name );
*line_len += name_len;
if( !last )
{
printf( SEPARATOR );
*line_len += SEPARATOR_LEN;
}
}
}
static void print_csp_names( int longhelp )
{
if( longhelp < 2 )
return;
# define INDENT " "
printf( " - valid csps for `raw' demuxer:\n" );
printf( INDENT );
size_t line_len = INDENT_LEN;
for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
{
if( x264_cli_csps[i].name )
{
printf( "%s", x264_cli_csps[i].name );
if( i+1 < X264_CSP_CLI_MAX )
printf( ", " );
}
}
print_csp_name_internal( x264_cli_csps[i].name, &line_len, i == X264_CSP_CLI_MAX-1 );
#if HAVE_LAVF
printf( "\n" );
printf( " - valid csps for `lavf' demuxer:\n" );
printf( INDENT );
size_t line_len = strlen( INDENT );
line_len = INDENT_LEN;
for( enum AVPixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ )
{
const char *pfname = av_get_pix_fmt_name( i );
if( pfname )
{
size_t name_len = strlen( pfname );
if( line_len + name_len > (80 - strlen( ", " )) )
{
printf( "\n" INDENT );
line_len = strlen( INDENT );
}
printf( "%s", pfname );
line_len += name_len;
if( i+1 < AV_PIX_FMT_NB )
{
printf( ", " );
line_len += 2;
}
}
}
print_csp_name_internal( av_get_pix_fmt_name( i ), &line_len, i == AV_PIX_FMT_NB-1 );
#endif
printf( "\n" );
}
Loading
Loading
@@ -636,7 +636,7 @@ static void help( x264_param_t *defaults, int longhelp )
" - grain (psy tuning):\n"
" --aq-strength 0.5 --no-dct-decimate\n"
" --deadzone-inter 6 --deadzone-intra 6\n"
" --deblock -2:-2 --ipratio 1.1 \n"
" --deblock -2:-2 --ipratio 1.1\n"
" --pbratio 1.1 --psy-rd <unset>:0.25\n"
" --qcomp 0.8\n"
" - stillimage (psy tuning):\n"
Loading
Loading
Loading
Loading
@@ -45,7 +45,7 @@ extern "C" {
 
#include "x264_config.h"
 
#define X264_BUILD 148
#define X264_BUILD 152
 
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
Loading
Loading
@@ -119,39 +119,38 @@ typedef struct x264_nal_t
/* CPU flags */
 
/* x86 */
#define X264_CPU_CMOV 0x0000001
#define X264_CPU_MMX 0x0000002
#define X264_CPU_MMX2 0x0000004 /* MMX2 aka MMXEXT aka ISSE */
#define X264_CPU_MMXEXT X264_CPU_MMX2
#define X264_CPU_SSE 0x0000008
#define X264_CPU_SSE2 0x0000010
#define X264_CPU_SSE3 0x0000020
#define X264_CPU_SSSE3 0x0000040
#define X264_CPU_SSE4 0x0000080 /* SSE4.1 */
#define X264_CPU_SSE42 0x0000100 /* SSE4.2 */
#define X264_CPU_LZCNT 0x0000200 /* Phenom support for "leading zero count" instruction. */
#define X264_CPU_AVX 0x0000400 /* AVX support: requires OS support even if YMM registers aren't used. */
#define X264_CPU_XOP 0x0000800 /* AMD XOP */
#define X264_CPU_FMA4 0x0001000 /* AMD FMA4 */
#define X264_CPU_FMA3 0x0002000 /* FMA3 */
#define X264_CPU_AVX2 0x0004000 /* AVX2 */
#define X264_CPU_BMI1 0x0008000 /* BMI1 */
#define X264_CPU_BMI2 0x0010000 /* BMI2 */
#define X264_CPU_MMX (1<<0)
#define X264_CPU_MMX2 (1<<1) /* MMX2 aka MMXEXT aka ISSE */
#define X264_CPU_MMXEXT X264_CPU_MMX2
#define X264_CPU_SSE (1<<2)
#define X264_CPU_SSE2 (1<<3)
#define X264_CPU_LZCNT (1<<4)
#define X264_CPU_SSE3 (1<<5)
#define X264_CPU_SSSE3 (1<<6)
#define X264_CPU_SSE4 (1<<7) /* SSE4.1 */
#define X264_CPU_SSE42 (1<<8) /* SSE4.2 */
#define X264_CPU_AVX (1<<9) /* Requires OS support even if YMM registers aren't used */
#define X264_CPU_XOP (1<<10) /* AMD XOP */
#define X264_CPU_FMA4 (1<<11) /* AMD FMA4 */
#define X264_CPU_FMA3 (1<<12)
#define X264_CPU_BMI1 (1<<13)
#define X264_CPU_BMI2 (1<<14)
#define X264_CPU_AVX2 (1<<15)
#define X264_CPU_AVX512 (1<<16) /* AVX-512 {F, CD, BW, DQ, VL}, requires OS support */
/* x86 modifiers */
#define X264_CPU_CACHELINE_32 0x0020000 /* avoid memory loads that span the border between two cachelines */
#define X264_CPU_CACHELINE_64 0x0040000 /* 32/64 is the size of a cacheline in bytes */
#define X264_CPU_SSE2_IS_SLOW 0x0080000 /* avoid most SSE2 functions on Athlon64 */
#define X264_CPU_SSE2_IS_FAST 0x0100000 /* a few functions are only faster on Core2 and Phenom */
#define X264_CPU_SLOW_SHUFFLE 0x0200000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
#define X264_CPU_STACK_MOD4 0x0400000 /* if stack is only mod4 and not mod16 */
#define X264_CPU_SLOW_CTZ 0x0800000 /* BSR/BSF x86 instructions are really slow on some CPUs */
#define X264_CPU_SLOW_ATOM 0x1000000 /* The Atom is terrible: slow SSE unaligned loads, slow
#define X264_CPU_CACHELINE_32 (1<<17) /* avoid memory loads that span the border between two cachelines */
#define X264_CPU_CACHELINE_64 (1<<18) /* 32/64 is the size of a cacheline in bytes */
#define X264_CPU_SSE2_IS_SLOW (1<<19) /* avoid most SSE2 functions on Athlon64 */
#define X264_CPU_SSE2_IS_FAST (1<<20) /* a few functions are only faster on Core2 and Phenom */
#define X264_CPU_SLOW_SHUFFLE (1<<21) /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
#define X264_CPU_STACK_MOD4 (1<<22) /* if stack is only mod4 and not mod16 */
#define X264_CPU_SLOW_ATOM (1<<23) /* The Atom is terrible: slow SSE unaligned loads, slow
* SIMD multiplies, slow SIMD variable shifts, slow pshufb,
* cacheline split penalties -- gather everything here that
* isn't shared by other CPUs to avoid making half a dozen
* new SLOW flags. */
#define X264_CPU_SLOW_PSHUFB 0x2000000 /* such as on the Intel Atom */
#define X264_CPU_SLOW_PALIGNR 0x4000000 /* such as on the AMD Bobcat */
#define X264_CPU_SLOW_PSHUFB (1<<24) /* such as on the Intel Atom */
#define X264_CPU_SLOW_PALIGNR (1<<25) /* such as on the AMD Bobcat */
 
/* PowerPC */
#define X264_CPU_ALTIVEC 0x0000001
Loading
Loading
@@ -227,13 +226,15 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_YUYV 0x0008 /* yuyv 4:2:2 packed */
#define X264_CSP_UYVY 0x0009 /* uyvy 4:2:2 packed */
#define X264_CSP_V210 0x000a /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x000b /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000c /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000d /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000e /* packed bgr 32bits */
#define X264_CSP_RGB 0x000f /* packed rgb 24bits */
#define X264_CSP_MAX 0x0010 /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
 
Loading
Loading
@@ -563,19 +564,19 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
 
typedef struct x264_level_t
{
int level_idc;
int mbps; /* max macroblock processing rate (macroblocks/sec) */
int frame_size; /* max frame size (macroblocks) */
int dpb; /* max decoded picture buffer (mbs) */
int bitrate; /* max bitrate (kbit/sec) */
int cpb; /* max vbv buffer (kbit) */
int mv_range; /* max vertical mv component range (pixels) */
int mvs_per_2mb; /* max mvs per 2 consecutive mbs. */
int slice_rate; /* ?? */
int mincr; /* min compression ratio */
int bipred8x8; /* limit bipred to >=8x8 */
int direct8x8; /* limit b_direct to >=8x8 */
int frame_only; /* forbid interlacing */
uint8_t level_idc;
uint32_t mbps; /* max macroblock processing rate (macroblocks/sec) */
uint32_t frame_size; /* max frame size (macroblocks) */
uint32_t dpb; /* max decoded picture buffer (mbs) */
uint32_t bitrate; /* max bitrate (kbit/sec) */
uint32_t cpb; /* max vbv buffer (kbit) */
uint16_t mv_range; /* max vertical mv component range (pixels) */
uint8_t mvs_per_2mb; /* max mvs per 2 consecutive mbs. */
uint8_t slice_rate; /* ?? */
uint8_t mincr; /* min compression ratio */
uint8_t bipred8x8; /* limit bipred to >=8x8 */
uint8_t direct8x8; /* limit b_direct to >=8x8 */
uint8_t frame_only; /* forbid interlacing */
} x264_level_t;
 
/* all of the levels defined in the standard, terminated by .level_idc=0 */
Loading
Loading
Loading
Loading
@@ -3,4 +3,4 @@
#define X264_INTERLACED 1
#define X264_CHROMA_FORMAT 0
#define X264_VERSION ""
#define X264_POINTVER "0.148.x"
#define X264_POINTVER "0.152.x"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment