Merge pull request #531 from tylerjroach/h264

Strip x264 to resolve licensing issues.

Merge pull request #531 from tylerjroach/h264
dd94f023 · Leo Ma · GitHub · b7b495ab · 84408c34 · b7b495ab
Commit dd94f023 authored 7 years ago by Leo Ma Committed by GitHub 7 years ago
--- a/library/src/main/cpp/libx264/common/aarch64/deblock-a.S
+++ b/library/src/main/cpp/libx264/common/aarch64/deblock-a.S
--- a/library/src/main/cpp/libx264/common/aarch64/mc-a.S
+++ b/library/src/main/cpp/libx264/common/aarch64/mc-a.S
--- a/library/src/main/cpp/libx264/common/aarch64/mc-c.c
+++ b/library/src/main/cpp/libx264/common/aarch64/mc-c.c
-/*****************************************************************************
- * mc-c.c: aarch64 motion compensation
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "common/common.h"
-#include "mc.h"
-
-void x264_prefetch_ref_aarch64( uint8_t *, intptr_t, int );
-void x264_prefetch_fenc_420_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_prefetch_fenc_422_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
-void x264_memzero_aligned_neon( void *dst, size_t n );
-
-void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-
-void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
-                                pixel *src, intptr_t i_src, int w, int h );
-void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
-                                     pixel *src, intptr_t i_src, int w, int h );
-void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
-                                         pixel *dstv, intptr_t i_dstv,
-                                         pixel *src,  intptr_t i_src, int w, int h );
-void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
-                                            pixel *dstb, intptr_t i_dstb,
-                                            pixel *dstc, intptr_t i_dstc,
-                                            pixel *src,  intptr_t i_src, int pw, int w, int h );
-void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
-                                           pixel *srcu, intptr_t i_srcu,
-                                           pixel *srcv, intptr_t i_srcv, int w, int h );
-
-void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
-void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
-void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
-
-#define MC_WEIGHT(func)\
-void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-\
-static void (* x264_mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\
-{\
-    x264_mc_weight_w4##func##_neon,\
-    x264_mc_weight_w4##func##_neon,\
-    x264_mc_weight_w8##func##_neon,\
-    x264_mc_weight_w16##func##_neon,\
-    x264_mc_weight_w16##func##_neon,\
-    x264_mc_weight_w20##func##_neon,\
-};
-
-MC_WEIGHT()
-MC_WEIGHT(_nodenom)
-MC_WEIGHT(_offsetadd)
-MC_WEIGHT(_offsetsub)
-
-void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
-void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
-void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
-void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
-void x264_integral_init8v_neon( uint16_t *, intptr_t );
-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
-
-void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
-
-void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
-void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
-
-#if !HIGH_BIT_DEPTH
-static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
-{
-    if( w->i_scale == 1<<w->i_denom )
-    {
-        if( w->i_offset < 0 )
-        {
-            w->weightfn = x264_mc_offsetsub_wtab_neon;
-            w->cachea[0] = -w->i_offset;
-        }
-        else
-        {
-            w->weightfn = x264_mc_offsetadd_wtab_neon;
-            w->cachea[0] = w->i_offset;
-        }
-    }
-    else if( !w->i_denom )
-        w->weightfn = x264_mc_nodenom_wtab_neon;
-    else
-        w->weightfn = x264_mc_wtab_neon;
-}
-
-static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
-{
-    NULL,
-    x264_pixel_avg2_w4_neon,
-    x264_pixel_avg2_w8_neon,
-    x264_pixel_avg2_w16_neon,   // no slower than w12, so no point in a separate function
-    x264_pixel_avg2_w16_neon,
-    x264_pixel_avg2_w20_neon,
-};
-
-static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
-{
-    NULL,
-    x264_mc_copy_w4_neon,
-    x264_mc_copy_w8_neon,
-    NULL,
-    x264_mc_copy_w16_neon,
-};
-
-static void mc_luma_neon( uint8_t *dst,    intptr_t i_dst_stride,
-                          uint8_t *src[4], intptr_t i_src_stride,
-                          int mvx, int mvy,
-                          int i_width, int i_height, const x264_weight_t *weight )
-{
-    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
-    if( (mvy&3) == 3 )             // explict if() to force conditional add
-        src1 += i_src_stride;
-
-    if( qpel_idx & 5 ) /* qpel interpolation needed */
-    {
-        uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
-                dst, i_dst_stride, src1, i_src_stride,
-                src2, i_height );
-        if( weight->weightfn )
-            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
-    }
-    else if( weight->weightfn )
-        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
-    else
-        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
-}
-
-static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
-                              uint8_t *src[4], intptr_t i_src_stride,
-                              int mvx, int mvy,
-                              int i_width, int i_height, const x264_weight_t *weight )
-{
-    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
-    if( (mvy&3) == 3 )             // explict if() to force conditional add
-        src1 += i_src_stride;
-
-    if( qpel_idx & 5 ) /* qpel interpolation needed */
-    {
-        uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
-                dst, *i_dst_stride, src1, i_src_stride,
-                src2, i_height );
-        if( weight->weightfn )
-            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
-        return dst;
-    }
-    else if( weight->weightfn )
-    {
-        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
-        return dst;
-    }
-    else
-    {
-        *i_dst_stride = i_src_stride;
-        return src1;
-    }
-}
-
-void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
-                            uint8_t *src, intptr_t stride, int width,
-                            int height, int16_t *buf );
-
-PLANE_COPY(16, neon)
-PLANE_COPY_SWAP(16, neon)
-PLANE_INTERLEAVE(neon)
-#endif // !HIGH_BIT_DEPTH
-
-PROPAGATE_LIST(neon)
-
-void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
-{
-#if !HIGH_BIT_DEPTH
-    if( cpu&X264_CPU_ARMV8 )
-    {
-        pf->prefetch_fenc_420 = x264_prefetch_fenc_420_aarch64;
-        pf->prefetch_fenc_422 = x264_prefetch_fenc_422_aarch64;
-        pf->prefetch_ref      = x264_prefetch_ref_aarch64;
-    }
-
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-    pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
-    pf->copy[PIXEL_16x16]    = x264_mc_copy_w16_neon;
-    pf->copy[PIXEL_8x8]      = x264_mc_copy_w8_neon;
-    pf->copy[PIXEL_4x4]      = x264_mc_copy_w4_neon;
-
-    pf->plane_copy                  = x264_plane_copy_neon;
-    pf->plane_copy_swap             = x264_plane_copy_swap_neon;
-    pf->plane_copy_deinterleave     = x264_plane_copy_deinterleave_neon;
-    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
-    pf->plane_copy_interleave       = x264_plane_copy_interleave_neon;
-
-    pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
-    pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
-    pf->store_interleave_chroma       = x264_store_interleave_chroma_neon;
-
-    pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
-    pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
-    pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;
-    pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_neon;
-    pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_neon;
-    pf->avg[PIXEL_4x16]  = x264_pixel_avg_4x16_neon;
-    pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_neon;
-    pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
-    pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
-
-    pf->weight       = x264_mc_wtab_neon;
-    pf->offsetadd    = x264_mc_offsetadd_wtab_neon;
-    pf->offsetsub    = x264_mc_offsetsub_wtab_neon;
-    pf->weight_cache = x264_weight_cache_neon;
-
-    pf->mc_chroma = x264_mc_chroma_neon;
-    pf->mc_luma = mc_luma_neon;
-    pf->get_ref = get_ref_neon;
-    pf->hpel_filter = x264_hpel_filter_neon;
-    pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
-
-    pf->integral_init4h = x264_integral_init4h_neon;
-    pf->integral_init8h = x264_integral_init8h_neon;
-    pf->integral_init4v = x264_integral_init4v_neon;
-    pf->integral_init8v = x264_integral_init8v_neon;
-
-    pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon;
-    pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_neon;
-    pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_neon;
-
-    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
-    pf->memzero_aligned = x264_memzero_aligned_neon;
-#endif // !HIGH_BIT_DEPTH
-}
--- a/library/src/main/cpp/libx264/common/aarch64/mc.h
+++ b/library/src/main/cpp/libx264/common/aarch64/mc.h
-/*****************************************************************************
- * mc.h: aarch64 motion compensation
- *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
- *
- * Authors: Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_AARCH64_MC_H
-#define X264_AARCH64_MC_H
-
-void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf );
-
-#endif
--- a/library/src/main/cpp/libx264/common/aarch64/pixel-a.S
+++ b/library/src/main/cpp/libx264/common/aarch64/pixel-a.S
--- a/library/src/main/cpp/libx264/common/aarch64/pixel.h
+++ b/library/src/main/cpp/libx264/common/aarch64/pixel.h
-/*****************************************************************************
- * pixel.h: aarch64 pixel metrics
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_AARCH64_PIXEL_H
-#define X264_AARCH64_PIXEL_H
-
-#define DECL_PIXELS( ret, name, suffix, args ) \
-    ret x264_pixel_##name##_16x16_##suffix args;\
-    ret x264_pixel_##name##_16x8_##suffix args;\
-    ret x264_pixel_##name##_8x16_##suffix args;\
-    ret x264_pixel_##name##_8x8_##suffix args;\
-    ret x264_pixel_##name##_8x4_##suffix args;\
-    ret x264_pixel_##name##_4x16_##suffix args;\
-    ret x264_pixel_##name##_4x8_##suffix args;\
-    ret x264_pixel_##name##_4x4_##suffix args;\
-
-#define DECL_X1( name, suffix ) \
-    DECL_PIXELS( int, name, suffix, ( uint8_t *, intptr_t, uint8_t *, intptr_t ) )
-
-#define DECL_X4( name, suffix ) \
-    DECL_PIXELS( void, name##_x3, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )\
-    DECL_PIXELS( void, name##_x4, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )
-
-DECL_X1( sad, neon )
-DECL_X4( sad, neon )
-DECL_X1( satd, neon )
-DECL_X1( ssd, neon )
-
-
-void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * );
-
-int x264_pixel_vsad_neon( uint8_t *, intptr_t, int );
-
-int x264_pixel_sa8d_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
-int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
-uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
-
-uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
-uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
-uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
-int x264_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * );
-int x264_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * );
-
-uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
-uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
-uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t );
-uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t );
-
-void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
-                                      const uint8_t *, intptr_t,
-                                      int sums[2][4] );
-float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
-
-int x264_pixel_asd8_neon( uint8_t *, intptr_t,  uint8_t *, intptr_t, int );
-
-#endif
--- a/library/src/main/cpp/libx264/common/aarch64/predict-a.S
+++ b/library/src/main/cpp/libx264/common/aarch64/predict-a.S
--- a/library/src/main/cpp/libx264/common/aarch64/predict-c.c
+++ b/library/src/main/cpp/libx264/common/aarch64/predict-c.c
-/*****************************************************************************
- * predict.c: aarch64 intra prediction
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "common/common.h"
-#include "predict.h"
-#include "pixel.h"
-
-void x264_predict_4x4_dc_top_neon( uint8_t *src );
-void x264_predict_4x4_ddr_neon( uint8_t *src );
-void x264_predict_4x4_ddl_neon( uint8_t *src );
-
-void x264_predict_8x8c_dc_top_neon( uint8_t *src );
-void x264_predict_8x8c_dc_left_neon( uint8_t *src );
-void x264_predict_8x8c_p_neon( uint8_t *src );
-
-void x264_predict_8x16c_dc_left_neon( uint8_t *src );
-void x264_predict_8x16c_dc_top_neon( uint8_t *src );
-void x264_predict_8x16c_p_neon( uint8_t *src );
-
-void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
-
-void x264_predict_16x16_dc_top_neon( uint8_t *src );
-void x264_predict_16x16_dc_left_neon( uint8_t *src );
-void x264_predict_16x16_p_neon( uint8_t *src );
-
-void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] )
-{
-#if !HIGH_BIT_DEPTH
-    if( cpu&X264_CPU_ARMV8 )
-    {
-        pf[I_PRED_4x4_H]   = x264_predict_4x4_h_aarch64;
-        pf[I_PRED_4x4_V]   = x264_predict_4x4_v_aarch64;
-    }
-
-    if( cpu&X264_CPU_NEON )
-    {
-        pf[I_PRED_4x4_DC]     = x264_predict_4x4_dc_neon;
-        pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_neon;
-        pf[I_PRED_4x4_DDL]    = x264_predict_4x4_ddl_neon;
-        pf[I_PRED_4x4_DDR]    = x264_predict_4x4_ddr_neon;
-    }
-#endif // !HIGH_BIT_DEPTH
-}
-
-void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] )
-{
-#if !HIGH_BIT_DEPTH
-    if( cpu&X264_CPU_ARMV8 )
-    {
-        pf[I_PRED_CHROMA_V]   = x264_predict_8x8c_v_aarch64;
-    }
-
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-    pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_neon;
-    pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_neon;
-    pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
-    pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_neon;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_neon;
-#endif // !HIGH_BIT_DEPTH
-}
-
-
-void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] )
-{
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-#if !HIGH_BIT_DEPTH
-    pf[I_PRED_CHROMA_V ]     = x264_predict_8x16c_v_neon;
-    pf[I_PRED_CHROMA_H ]     = x264_predict_8x16c_h_neon;
-    pf[I_PRED_CHROMA_DC]     = x264_predict_8x16c_dc_neon;
-    pf[I_PRED_CHROMA_P ]     = x264_predict_8x16c_p_neon;
-    pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x16c_dc_left_neon;
-    pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x16c_dc_top_neon;
-#endif // !HIGH_BIT_DEPTH
-}
-
-void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
-{
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-#if !HIGH_BIT_DEPTH
-    pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_neon;
-    pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_neon;
-    pf[I_PRED_8x8_VL]  = x264_predict_8x8_vl_neon;
-    pf[I_PRED_8x8_VR]  = x264_predict_8x8_vr_neon;
-    pf[I_PRED_8x8_DC]  = x264_predict_8x8_dc_neon;
-    pf[I_PRED_8x8_H]   = x264_predict_8x8_h_neon;
-    pf[I_PRED_8x8_HD]  = x264_predict_8x8_hd_neon;
-    pf[I_PRED_8x8_HU]  = x264_predict_8x8_hu_neon;
-    pf[I_PRED_8x8_V]   = x264_predict_8x8_v_neon;
-#endif // !HIGH_BIT_DEPTH
-}
-
-void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] )
-{
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-#if !HIGH_BIT_DEPTH
-    pf[I_PRED_16x16_DC ]    = x264_predict_16x16_dc_neon;
-    pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
-    pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
-    pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_neon;
-    pf[I_PRED_16x16_V ]     = x264_predict_16x16_v_neon;
-    pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_neon;
-#endif // !HIGH_BIT_DEPTH
-}
--- a/library/src/main/cpp/libx264/common/aarch64/predict.h
+++ b/library/src/main/cpp/libx264/common/aarch64/predict.h
-/*****************************************************************************
- * predict.h: aarch64 intra prediction
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_AARCH64_PREDICT_H
-#define X264_AARCH64_PREDICT_H
-
-void x264_predict_4x4_h_aarch64( uint8_t *src );
-void x264_predict_4x4_v_aarch64( uint8_t *src );
-void x264_predict_8x8c_v_aarch64( uint8_t *src );
-
-// for the merged 4x4 intra sad/satd which expects unified suffix
-#define x264_predict_4x4_h_neon x264_predict_4x4_h_aarch64
-#define x264_predict_4x4_v_neon x264_predict_4x4_v_aarch64
-#define x264_predict_8x8c_v_neon x264_predict_8x8c_v_aarch64
-
-void x264_predict_4x4_dc_neon( uint8_t *src );
-void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8c_dc_neon( uint8_t *src );
-void x264_predict_8x8c_h_neon( uint8_t *src );
-void x264_predict_8x16c_v_neon( uint8_t *src );
-void x264_predict_8x16c_h_neon( uint8_t *src );
-void x264_predict_8x16c_dc_neon( uint8_t *src );
-void x264_predict_16x16_v_neon( uint8_t *src );
-void x264_predict_16x16_h_neon( uint8_t *src );
-void x264_predict_16x16_dc_neon( uint8_t *src );
-
-void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] );
-void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
-void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] );
-void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] );
-void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] );
-
-#endif /* X264_AARCH64_PREDICT_H */
--- a/library/src/main/cpp/libx264/common/aarch64/quant-a.S
+++ b/library/src/main/cpp/libx264/common/aarch64/quant-a.S
-/****************************************************************************
- * quant.S: arm quantization and level-run
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *          Martin Storsjo <martin@martin.st>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "asm.S"
-
-.macro QUANT_TWO bias0 bias1 mf0_1 mf2_3 mask
-    add         v18.8h, v18.8h, \bias0
-    add         v19.8h, v19.8h, \bias1
-    umull       v20.4s, v18.4h, \mf0_1\().4h
-    umull2      v21.4s, v18.8h, \mf0_1\().8h
-    umull       v22.4s, v19.4h, \mf2_3\().4h
-    umull2      v23.4s, v19.8h, \mf2_3\().8h
-    sshr        v16.8h, v16.8h, #15
-    sshr        v17.8h, v17.8h, #15
-    shrn        v18.4h, v20.4s, #16
-    shrn2       v18.8h, v21.4s, #16
-    shrn        v19.4h, v22.4s, #16
-    shrn2       v19.8h, v23.4s, #16
-    eor         v18.16b, v18.16b, v16.16b
-    eor         v19.16b, v19.16b, v17.16b
-    sub         v18.8h, v18.8h, v16.8h
-    sub         v19.8h, v19.8h, v17.8h
-    orr         \mask,  v18.16b, v19.16b
-    st1        {v18.8h,v19.8h}, [x0], #32
-.endm
-
-.macro QUANT_END d
-    fmov        x2,  \d
-    mov         w0,  #0
-    tst         x2,  x2
-    cinc        w0,  w0,  ne
-    ret
-.endm
-
-// quant_2x2_dc( int16_t dct[4], int mf, int bias )
-function x264_quant_2x2_dc_neon, export=1
-    ld1        {v0.4h}, [x0]
-    dup         v2.4h,  w2
-    dup         v1.4h,  w1
-    abs         v3.4h,  v0.4h
-    add         v3.4h,  v3.4h,  v2.4h
-    umull       v3.4s,  v3.4h,  v1.4h
-    sshr        v0.4h,  v0.4h,  #15
-    shrn        v3.4h,  v3.4s,  #16
-    eor         v3.8b,  v3.8b,  v0.8b
-    sub         v3.4h,  v3.4h,  v0.4h
-    st1        {v3.4h}, [x0]
-    QUANT_END   d3
-endfunc
-
-// quant_4x4_dc( int16_t dct[16], int mf, int bias )
-function x264_quant_4x4_dc_neon, export=1
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h,  v16.8h
-    abs         v19.8h,  v17.8h
-    dup         v0.8h,  w2
-    dup         v2.8h,  w1
-    QUANT_TWO   v0.8h,  v0.8h,  v2,  v2,  v0.16b
-    uqxtn       v0.8b,  v0.8h
-    QUANT_END   d0
-endfunc
-
-// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4_neon, export=1
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h,  v16.8h
-    abs         v19.8h,  v17.8h
-    ld1        {v0.8h,v1.8h}, [x2]
-    ld1        {v2.8h,v3.8h}, [x1]
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v0.16b
-    uqxtn       v0.8b,  v0.8h
-    QUANT_END   d0
-endfunc
-
-// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4x4_neon, export=1
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    ld1        {v0.8h,v1.8h}, [x2]
-    ld1        {v2.8h,v3.8h}, [x1]
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v4.16b
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v5.16b
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v6.16b
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v7.16b
-    uqxtn       v4.8b,  v4.8h
-    uqxtn       v7.8b,  v7.8h
-    uqxtn       v6.8b,  v6.8h
-    uqxtn       v5.8b,  v5.8h
-    fmov        x7,  d7
-    fmov        x6,  d6
-    fmov        x5,  d5
-    fmov        x4,  d4
-    mov         w0,  #0
-    tst         x7,  x7
-    cinc        w0,  w0,  ne
-    lsl         w0,  w0,  #1
-    tst         x6,  x6
-    cinc        w0,  w0,  ne
-    lsl         w0,  w0,  #1
-    tst         x5,  x5
-    cinc        w0,  w0,  ne
-    lsl         w0,  w0,  #1
-    tst         x4,  x4
-    cinc        w0,  w0,  ne
-    ret
-endfunc
-
-// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
-function x264_quant_8x8_neon, export=1
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    ld1        {v0.8h,v1.8h}, [x2], #32
-    ld1        {v2.8h,v3.8h}, [x1], #32
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v4.16b
-.rept 3
-    ld1        {v16.8h,v17.8h}, [x0]
-    abs         v18.8h, v16.8h
-    abs         v19.8h, v17.8h
-    ld1        {v0.8h,v1.8h}, [x2], #32
-    ld1        {v2.8h,v3.8h}, [x1], #32
-    QUANT_TWO   v0.8h,  v1.8h,  v2,  v3,  v5.16b
-    orr         v4.16b, v4.16b, v5.16b
-.endr
-    uqxtn       v0.8b,  v4.8h
-    QUANT_END   d0
-endfunc
-
-.macro DEQUANT_START mf_size offset dc=no
-    mov         w3,  #0x2b
-    mul         w3,  w3,  w2
-    lsr         w3,  w3,  #8            // i_qbits = i_qp / 6
-    add         w5,  w3,  w3,  lsl #1
-    sub         w2,  w2,  w5,  lsl #1   // i_mf = i_qp % 6
-    lsl         w2,  w2,  #\mf_size
-.ifc \dc,no
-    add         x1,  x1,  w2, sxtw      // dequant_mf[i_mf]
-.else
-    ldr         x1, [x1,  w2, sxtw]     // dequant_mf[i_mf][0][0]
-.endif
-    subs        w3,  w3,  #\offset      // 6 for 8x8
-.endm
-
-// dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-.macro DEQUANT size bits
-function x264_dequant_\size\()_neon, export=1
-    DEQUANT_START \bits+2, \bits
-.ifc \size, 8x8
-    mov         w2,  #4
-.endif
-    b.lt        dequant_\size\()_rshift
-
-    dup         v31.8h, w3
-dequant_\size\()_lshift_loop:
-.ifc \size, 8x8
-    subs        w2,  w2,  #1
-.endif
-    ld1        {v16.4s}, [x1], #16
-    ld1        {v17.4s}, [x1], #16
-    sqxtn       v2.4h,  v16.4s
-    ld1        {v18.4s}, [x1], #16
-    sqxtn2      v2.8h,  v17.4s
-    ld1        {v19.4s}, [x1], #16
-    sqxtn       v3.4h,  v18.4s
-    ld1        {v0.8h,v1.8h}, [x0]
-    sqxtn2      v3.8h,  v19.4s
-    mul         v0.8h,  v0.8h,  v2.8h
-    mul         v1.8h,  v1.8h,  v3.8h
-    sshl        v0.8h,  v0.8h,  v31.8h
-    sshl        v1.8h,  v1.8h,  v31.8h
-    st1        {v0.8h,v1.8h}, [x0], #32
-.ifc \size, 8x8
-    b.gt        dequant_\size\()_lshift_loop
-.endif
-    ret
-
-dequant_\size\()_rshift:
-    dup         v31.4s, w3
-    neg         w3,  w3
-    mov         w5,  #1
-    sub         w3,  w3,  #1
-    lsl         w5,  w5,  w3
-
-.ifc \size, 8x8
-dequant_\size\()_rshift_loop:
-    subs        w2,  w2,  #1
-.endif
-    ld1        {v16.4s}, [x1], #16
-    ld1        {v17.4s}, [x1], #16
-    sqxtn       v2.4h,  v16.4s
-    ld1        {v18.4s}, [x1], #16
-    dup         v16.4s, w5
-    sqxtn2      v2.8h,  v17.4s
-    ld1        {v19.4s}, [x1], #16
-    dup         v17.4s, w5
-    sqxtn       v3.4h,  v18.4s
-    ld1        {v0.8h,v1.8h}, [x0]
-    dup         v18.4s, w5
-    sqxtn2      v3.8h,  v19.4s
-    dup         v19.4s, w5
-
-    smlal       v16.4s, v0.4h,  v2.4h
-    smlal2      v17.4s, v0.8h,  v2.8h
-    smlal       v18.4s, v1.4h,  v3.4h
-    smlal2      v19.4s, v1.8h,  v3.8h
-    sshl        v16.4s, v16.4s, v31.4s
-    sshl        v17.4s, v17.4s, v31.4s
-    sshl        v18.4s, v18.4s, v31.4s
-    sshl        v19.4s, v19.4s, v31.4s
-
-    sqxtn       v0.4h,  v16.4s
-    sqxtn2      v0.8h,  v17.4s
-    sqxtn       v1.4h,  v18.4s
-    sqxtn2      v1.8h,  v19.4s
-    st1        {v0.8h,v1.8h}, [x0], #32
-.ifc \size, 8x8
-    b.gt        dequant_\size\()_rshift_loop
-.endif
-    ret
-endfunc
-.endm
-
-DEQUANT 4x4, 4
-DEQUANT 8x8, 6
-
-// dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-function x264_dequant_4x4_dc_neon, export=1
-    DEQUANT_START 6, 6, yes
-    b.lt        dequant_4x4_dc_rshift
-
-    lsl         w1,  w1,  w3
-    dup         v2.8h,  w1
-    ld1        {v0.8h,v1.8h},   [x0]
-
-    mul         v0.8h,  v0.8h,  v2.8h
-    mul         v1.8h,  v1.8h,  v2.8h
-    st1        {v0.8h,v1.8h},   [x0]
-    ret
-
-dequant_4x4_dc_rshift:
-    dup         v4.8h,  w1
-    dup         v3.4s, w3
-    neg         w3,  w3
-    mov         w5,  #1
-    sub         w3,  w3,  #1
-    lsl         w5,  w5,  w3
-
-    dup         v16.4s, w5
-    dup         v17.4s, w5
-    ld1        {v0.8h,v1.8h}, [x0]
-    dup         v18.4s, w5
-    dup         v19.4s, w5
-
-    smlal       v16.4s, v0.4h,  v4.4h
-    smlal2      v17.4s, v0.8h,  v4.8h
-    smlal       v18.4s, v1.4h,  v4.4h
-    smlal2      v19.4s, v1.8h,  v4.8h
-    sshl        v16.4s, v16.4s, v3.4s
-    sshl        v17.4s, v17.4s, v3.4s
-    sshl        v18.4s, v18.4s, v3.4s
-    sshl        v19.4s, v19.4s, v3.4s
-
-    sqxtn       v0.4h,  v16.4s
-    sqxtn2      v0.8h,  v17.4s
-    sqxtn       v1.4h,  v18.4s
-    sqxtn2      v1.8h,  v19.4s
-    st1        {v0.8h,v1.8h}, [x0]
-    ret
-endfunc
-
-.macro decimate_score_1x size
-function x264_decimate_score\size\()_neon, export=1
-    ld1        {v0.8h,v1.8h}, [x0]
-    movrel      x5,  X(x264_decimate_table4)
-    movi        v3.16b, #0x01
-    sqxtn       v0.8b,  v0.8h
-    sqxtn2      v0.16b, v1.8h
-    abs         v2.16b, v0.16b
-    cmeq        v1.16b, v0.16b, #0
-    cmhi        v2.16b, v2.16b, v3.16b
-    shrn        v1.8b,  v1.8h,  #4
-    shrn        v2.8b,  v2.8h,  #4
-    fmov        x2,  d2
-    fmov        x1,  d1
-    cbnz        x2,  9f
-    mvn         x1,  x1
-    mov         w0,  #0
-    cbz         x1,  0f
-.ifc \size, 15
-    lsr         x1,  x1,  #1
-.endif
-    rbit        x1,  x1
-1:
-    clz         x3,  x1
-    lsr         x6,  x3,  #2
-    lsl         x1,  x1,  x3
-    ldrb        w7,  [x5, x6]
-    lsl         x1,  x1,  #4
-    add         w0,  w0,  w7
-    cbnz        x1,  1b
-    ret
-9:
-    mov         w0,  #9
-0:
-    ret
-endfunc
-.endm
-
-decimate_score_1x 15
-decimate_score_1x 16
-
-const mask64, align=6
-    .byte  0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01
-    .byte  0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01
-endconst
-
-function x264_decimate_score64_neon, export=1
-    ld1        {v0.8h,v1.8h}, [x0], #32
-    ld1        {v2.8h,v3.8h}, [x0], #32
-    ld1        {v4.8h,v5.8h}, [x0], #32
-    ld1        {v6.8h,v7.8h}, [x0]
-    movrel      x6,  mask64
-    movi        v31.16b, #0x01
-    sqxtn       v16.8b,  v1.8h
-    sqxtn2      v16.16b, v0.8h
-    sqxtn       v17.8b,  v3.8h
-    sqxtn2      v17.16b, v2.8h
-    sqxtn       v18.8b,  v5.8h
-    sqxtn2      v18.16b, v4.8h
-    sqxtn       v19.8b,  v7.8h
-    sqxtn2      v19.16b, v6.8h
-    abs         v4.16b, v16.16b
-    abs         v5.16b, v17.16b
-    abs         v6.16b, v18.16b
-    abs         v7.16b, v19.16b
-    ld1        {v30.16b}, [x6]
-    cmeq        v0.16b, v16.16b, #0
-    cmeq        v1.16b, v17.16b, #0
-    cmeq        v2.16b, v18.16b, #0
-    cmeq        v3.16b, v19.16b, #0
-    umax        v4.16b, v4.16b, v5.16b
-    umax        v6.16b, v6.16b, v7.16b
-    and         v0.16b, v0.16b, v30.16b
-    and         v1.16b, v1.16b, v30.16b
-    and         v2.16b, v2.16b, v30.16b
-    and         v3.16b, v3.16b, v30.16b
-    umax        v4.16b, v4.16b, v6.16b
-    addp        v0.16b, v1.16b, v0.16b
-    addp        v2.16b, v3.16b, v2.16b
-    cmhi        v4.16b, v4.16b, v31.16b
-    addp        v0.16b, v2.16b, v0.16b
-    shrn        v4.8b,  v4.8h,  #4
-    addp        v0.16b, v0.16b, v0.16b
-    fmov        x2,  d4
-    fmov        x1,  d0
-    cbnz        x2,  9f
-    mvn         x1,  x1
-    mov         w0,  #0
-    cbz         x1,  0f
-    movrel      x5,  X(x264_decimate_table8)
-1:
-    clz         x3,  x1
-    lsl         x1,  x1,  x3
-    ldrb        w7,  [x5, x3]
-    lsl         x1,  x1,  #1
-    add         w0,  w0,  w7
-    cbnz        x1,  1b
-    ret
-9:
-    mov         w0,  #9
-0:
-    ret
-endfunc
-
-// int coeff_last( int16_t *l )
-function x264_coeff_last4_aarch64, export=1
-    ldr         x2,  [x0]
-    mov         w4,  #3
-    clz         x0,  x2
-    sub         w0,  w4,  w0, lsr #4
-    ret
-endfunc
-
-function x264_coeff_last8_aarch64, export=1
-    ldr         x3,  [x0, #8]
-    mov         w4,  #7
-    clz         x2,  x3
-    cmp         w2,  #64
-    b.ne        1f
-    ldr         x3,  [x0]
-    sub         w4,  w4,  #4
-    clz         x2,  x3
-1:
-    sub         w0,  w4,  w2, lsr #4
-    ret
-endfunc
-
-.macro COEFF_LAST_1x size
-function x264_coeff_last\size\()_neon, export=1
-.if \size == 15
-    sub         x0,  x0,  #2
-.endif
-    ld1        {v0.8h,v1.8h}, [x0]
-    uqxtn       v0.8b,  v0.8h
-    uqxtn2      v0.16b, v1.8h
-    cmtst       v0.16b, v0.16b, v0.16b
-    shrn        v0.8b,  v0.8h,  #4
-    fmov        x1,  d0
-    mov         w3,  #\size - 1
-    clz         x2,  x1
-    sub         w0,  w3,  w2, lsr #2
-    ret
-endfunc
-.endm
-
-COEFF_LAST_1x 15
-COEFF_LAST_1x 16
-
-function x264_coeff_last64_neon, export=1
-    ld1        {v0.8h,v1.8h,v2.8h,v3.8h}, [x0], 64
-    movi        v31.8h,  #8
-    movi        v30.8h,  #1
-    uqxtn       v0.8b,  v0.8h
-    uqxtn2      v0.16b, v1.8h
-    ld1        {v4.8h,v5.8h,v6.8h,v7.8h}, [x0], 64
-    uqxtn       v1.8b,  v2.8h
-    uqxtn2      v1.16b, v3.8h
-    uqxtn       v2.8b,  v4.8h
-    uqxtn2      v2.16b, v5.8h
-    uqxtn       v3.8b,  v6.8h
-    uqxtn2      v3.16b, v7.8h
-
-    cmtst       v0.16b, v0.16b, v0.16b
-    cmtst       v1.16b, v1.16b, v1.16b
-    cmtst       v2.16b, v2.16b, v2.16b
-    cmtst       v3.16b, v3.16b, v3.16b
-
-    shrn        v0.8b,  v0.8h,  #4
-    shrn2       v0.16b, v1.8h,  #4
-    shrn        v1.8b,  v2.8h,  #4
-    shrn2       v1.16b, v3.8h,  #4
-
-    clz         v0.4s,  v0.4s
-    clz         v1.4s,  v1.4s
-
-    shrn        v0.4h,  v0.4s,  #2
-    shrn2       v0.8h,  v1.4s,  #2
-
-    sub         v0.8h,  v31.8h,  v0.8h
-    sshl        v0.8h,  v30.8h,  v0.8h
-    shrn        v0.8b,  v0.8h,  #1
-
-    fmov        x2,  d0
-    mov         w3,  #63
-    clz         x2,  x2
-    sub         w0,  w3,  w2
-    ret
-endfunc
-
-.macro coeff_level_run_start size
-    add         x6,  x1,  #23            // runlevel->mask
-    mov         w7,  #0
-    mov         w8,  #0
-    mov         w9,  #1
-    and         x6,  x6,  #~15
-    mov         w4,  #\size - 1
-.endm
-
-.macro coeff_level_run shift
-    clz         x3,  x2
-    subs        w4,  w4,  w3, lsr #\shift
-    str         w4,  [x1], #4
-1:
-    ldrh        w5,  [x0, x4, lsl #1]
-    strh        w5,  [x6], #2
-    add         w7,  w7,  #1
-    lsl         w10, w9, w4
-    orr         w8,  w8,  w10
-    b.le        2f
-    add         w3,  w3,  #1 << \shift
-    sub         w4,  w4,  #1
-    and         x3,  x3,  #~((1 << \shift) - 1)
-    lsl         x2,  x2,  x3
-    clz         x3,  x2
-    subs        w4,  w4,  w3, lsr #\shift
-    b.ge        1b
-2:
-    str         w8,  [x1]
-    mov         w0,  w7
-.endm
-
-function x264_coeff_level_run4_aarch64, export=1
-    ldr         x2,  [x0]
-
-    coeff_level_run_start 4
-
-    coeff_level_run 4
-
-    ret
-endfunc
-
-.macro X264_COEFF_LEVEL_RUN size
-function x264_coeff_level_run\size\()_neon, export=1
-.if \size == 15
-    sub         x0,  x0,  #2
-.endif
-.if         \size < 15
-    ld1         {v0.8h}, [x0]
-    uqxtn       v0.8b,  v0.8h
-    cmtst       v0.8b,  v0.8b,  v0.8b
-.else
-    ld1         {v0.8h,v1.8h}, [x0]
-    uqxtn       v0.8b,  v0.8h
-    uqxtn2      v0.16b, v1.8h
-    cmtst       v0.16b, v0.16b, v0.16b
-    shrn        v0.8b,  v0.8h,  #4
-.endif
-    fmov        x2,  d0
-.if \size == 15
-    add         x0,  x0,  #2
-.endif
-
-    coeff_level_run_start \size
-
-    coeff_level_run (4 - (\size + 1) / 8)
-
-    ret
-endfunc
-.endm
-
-X264_COEFF_LEVEL_RUN 8
-X264_COEFF_LEVEL_RUN 15
-X264_COEFF_LEVEL_RUN 16
-
-function x264_denoise_dct_neon, export=1
-1:  subs        w3,  w3,  #16
-    ld1         {v0.8h,v1.8h}, [x0]
-    ld1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1]
-    abs         v16.8h,  v0.8h
-    abs         v17.8h,  v1.8h
-    ld1         {v2.8h,v3.8h}, [x2], #32
-    cmlt        v18.8h,  v0.8h,   #0
-    cmlt        v19.8h,  v1.8h,   #0
-    uaddw       v4.4s,   v4.4s,   v16.4h
-    uaddw2      v5.4s,   v5.4s,   v16.8h
-    uqsub       v20.8h,  v16.8h,  v2.8h
-    uqsub       v21.8h,  v17.8h,  v3.8h
-    uaddw       v6.4s,   v6.4s,   v17.4h
-    uaddw2      v7.4s,   v7.4s,   v17.8h
-    neg         v22.8h,  v20.8h
-    neg         v23.8h,  v21.8h
-    bsl         v18.16b, v22.16b, v20.16b
-    bsl         v19.16b, v23.16b, v21.16b
-    st1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1], #64
-    st1         {v18.8h,v19.8h}, [x0], #32
-    b.gt        1b
-    ret
-endfunc
--- a/library/src/main/cpp/libx264/common/aarch64/quant.h
+++ b/library/src/main/cpp/libx264/common/aarch64/quant.h
-/*****************************************************************************
- * quant.h: arm quantization and level-run
- *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_AARCH64_QUANT_H
-#define X264_AARCH64_QUANT_H
-
-int x264_quant_2x2_dc_aarch64( int16_t dct[4], int mf, int bias );
-
-int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias );
-int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias );
-int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
-int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
-int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
-
-void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
-void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
-void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
-
-int x264_decimate_score15_neon( int16_t * );
-int x264_decimate_score16_neon( int16_t * );
-int x264_decimate_score64_neon( int16_t * );
-
-int x264_coeff_last4_aarch64( int16_t * );
-int x264_coeff_last8_aarch64( int16_t * );
-int x264_coeff_last15_neon( int16_t * );
-int x264_coeff_last16_neon( int16_t * );
-int x264_coeff_last64_neon( int16_t * );
-
-int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * );
-int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * );
-int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * );
-int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * );
-
-void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
-
-#endif
--- a/library/src/main/cpp/libx264/common/arm/asm.S
+++ b/library/src/main/cpp/libx264/common/arm/asm.S
-/*****************************************************************************
- * asm.S: arm utility macros
- *****************************************************************************
- * Copyright (C) 2008-2017 x264 project
- *
- * Authors: Mans Rullgard <mans@mansr.com>
- *          David Conrad <lessen42@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "config.h"
-
-.syntax unified
-
-#ifndef __APPLE__
-.arch armv7-a
-.fpu neon
-#endif
-
-#ifdef PREFIX
-#   define EXTERN_ASM _
-#else
-#   define EXTERN_ASM
-#endif
-
-#ifdef __ELF__
-#   define ELF
-#else
-#   define ELF @
-#endif
-
-#ifdef __MACH__
-#   define MACH
-#   define NONMACH @
-#else
-#   define MACH @
-#   define NONMACH
-#endif
-
-#if HAVE_AS_FUNC
-#   define FUNC
-#else
-#   define FUNC @
-#endif
-
-#if SYS_LINUX
-#define HAVE_SECTION_DATA_REL_RO 1
-#else
-#define HAVE_SECTION_DATA_REL_RO 0
-#endif
-
-.macro require8, val=1
-ELF     .eabi_attribute 24, \val
-.endm
-
-.macro preserve8, val=1
-ELF     .eabi_attribute 25, \val
-.endm
-
-.macro function name, export=1
-    .macro endfunc
-ELF     .size   \name, . - \name
-FUNC    .endfunc
-        .purgem endfunc
-    .endm
-        .text
-        .align  2
-.if \export == 1
-        .global EXTERN_ASM\name
-ELF     .hidden EXTERN_ASM\name
-ELF     .type   EXTERN_ASM\name, %function
-FUNC    .func   EXTERN_ASM\name
-EXTERN_ASM\name:
-.else
-ELF     .hidden \name
-ELF     .type   \name, %function
-FUNC    .func   \name
-\name:
-.endif
-.endm
-
-.macro const name, align=2, relocate=0
-    .macro endconst
-ELF     .size   \name, . - \name
-        .purgem endconst
-    .endm
-.if HAVE_SECTION_DATA_REL_RO && \relocate
-        .section        .data.rel.ro
-.else
-NONMACH .section        .rodata
-MACH    .const_data
-.endif
-        .align          \align
-\name:
-.endm
-
-.macro movrel rd, val
-#if defined(PIC)
-        ldr             \rd,  1f
-        b               2f
-1:
-@ FIXME: thumb
-        .word           \val - (2f + 8)
-2:
-        add             \rd,  \rd,  pc
-#elif HAVE_ARMV6T2
-        movw            \rd, #:lower16:\val
-        movt            \rd, #:upper16:\val
-#else
-        ldr             \rd, =\val
-#endif
-.endm
-
-.macro movrelx rd, val, got
-#if defined(PIC) && defined(__ELF__)
-        ldr             \got, 2f
-        ldr             \rd,  1f
-        b               3f
-1:
-@ FIXME: thumb
-        .word \val(GOT)
-2:
-        .word _GLOBAL_OFFSET_TABLE_ - (3f + 8)
-3:
-        add             \got, \got, pc
-        ldr             \rd, [\got, \rd]
-#elif defined(PIC) && defined(__APPLE__)
-        ldr             \rd,  1f
-        b               2f
-1:
-@ FIXME: thumb
-        .word           3f - (2f + 8)
-2:
-        ldr             \rd, [pc, \rd]
-        .non_lazy_symbol_pointer
-3:
-        .indirect_symbol \val
-        .word           0
-        .text
-#else
-        movrel          \rd, \val
-#endif
-.endm
-
-.macro movconst rd, val
-#if HAVE_ARMV6T2
-    movw        \rd, #:lower16:\val
-.if \val >> 16
-    movt        \rd, #:upper16:\val
-.endif
-#else
-    ldr         \rd, =\val
-#endif
-.endm
-
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
-#define FENC_STRIDE 16
-#define FDEC_STRIDE 32
-
-.macro HORIZ_ADD dest, a, b
-.ifnb \b
-    vadd.u16    \a, \a, \b
-.endif
-    vpaddl.u16  \a, \a
-    vpaddl.u32  \dest, \a
-.endm
-
-.macro SUMSUB_AB sum, diff, a, b
-    vadd.s16    \sum,  \a, \b
-    vsub.s16    \diff, \a, \b
-.endm
-
-.macro SUMSUB_ABCD s1, d1, s2, d2, a, b, c, d
-    SUMSUB_AB   \s1, \d1, \a, \b
-    SUMSUB_AB   \s2, \d2, \c, \d
-.endm
-
-.macro ABS2 a b
-    vabs.s16 \a, \a
-    vabs.s16 \b, \b
-.endm
-
-// dist = distance in elements (0 for vertical pass, 1/2 for horizontal passes)
-// op = sumsub/amax (sum and diff / maximum of absolutes)
-// d1/2 = destination registers
-// s1/2 = source registers
-.macro HADAMARD dist, op, d1, d2, s1, s2
-.if \dist == 1
-    vtrn.16     \s1, \s2
-.else
-    vtrn.32     \s1, \s2
-.endif
-.ifc \op, sumsub
-    SUMSUB_AB   \d1, \d2, \s1, \s2
-.else
-    vabs.s16    \s1, \s1
-    vabs.s16    \s2, \s2
-    vmax.s16    \d1, \s1, \s2
-.endif
-.endm
-
-.macro TRANSPOSE8x8 r0 r1 r2 r3 r4 r5 r6 r7
-    vtrn.32         \r0, \r4
-    vtrn.32         \r1, \r5
-    vtrn.32         \r2, \r6
-    vtrn.32         \r3, \r7
-    vtrn.16         \r0, \r2
-    vtrn.16         \r1, \r3
-    vtrn.16         \r4, \r6
-    vtrn.16         \r5, \r7
-    vtrn.8          \r0, \r1
-    vtrn.8          \r2, \r3
-    vtrn.8          \r4, \r5
-    vtrn.8          \r6, \r7
-.endm
-
-.macro TRANSPOSE4x4 r0 r1 r2 r3
-    vtrn.16         \r0, \r2
-    vtrn.16         \r1, \r3
-    vtrn.8          \r0, \r1
-    vtrn.8          \r2, \r3
-.endm
-
-.macro TRANSPOSE4x4_16  d0 d1 d2 d3
-    vtrn.32     \d0, \d2
-    vtrn.32     \d1, \d3
-    vtrn.16     \d0, \d1
-    vtrn.16     \d2, \d3
-.endm
--- a/library/src/main/cpp/libx264/common/arm/bitstream-a.S
+++ b/library/src/main/cpp/libx264/common/arm/bitstream-a.S
-/*****************************************************************************
- * bitstream-a.S: arm bitstream functions
- *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
- *
- * Authors: Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "asm.S"
-
-function x264_nal_escape_neon
-    push        {r4-r5,lr}
-    vmov.u8     q0,  #0xff
-    vmov.u8     q8,  #4
-    mov         r3,  #3
-    subs        lr,  r1,  r2
-    beq         99f
-0:
-    cmn         lr,  #15
-    blt         16f
-    mov         r1,  r2
-    b           100f
-16:
-    vld1.8      {q1}, [r1]!
-    vext.8      q2,  q0,  q1, #14
-    vext.8      q3,  q0,  q1, #15
-    vcgt.u8     q11, q8,  q1
-    vceq.u8     q9,  q2,  #0
-    vceq.u8     q10, q3,  #0
-    vand        q9,  q9,  q11
-    vand        q9,  q9,  q10
-    vshrn.u16   d22, q9,  #4
-    vmov        ip,  lr,  d22
-    orrs        ip,  ip,  lr
-    beq         16f
-    mov         lr,  #-16
-100:
-    vmov.u8     r5,  d1[6]
-    vmov.u8     r4,  d1[7]
-    orr         r5,  r4,  r5, lsl #8
-101:
-    ldrb        r4,  [r1, lr]
-    orr         ip,  r4,  r5, lsl #16
-    cmp         ip,  #3
-    bhi         102f
-    strb        r3,  [r0], #1
-    orr         r5,  r3,  r5, lsl #8
-102:
-    adds        lr,  lr,  #1
-    strb        r4,  [r0], #1
-    orr         r5,  r4,  r5, lsl #8
-    blt         101b
-    subs        lr,  r1,  r2
-    lsr         ip,  r5,  #8
-    vmov.u8     d1[6],  ip
-    vmov.u8     d1[7],  r5
-    blt         0b
-
-    pop         {r4-r5,pc}
-16:
-    subs        lr,  r1,  r2
-    vst1.8      {q1}, [r0]!
-    vmov        q0, q1
-    blt         0b
-99:
-    pop         {r4-r5,pc}
-endfunc
--- a/library/src/main/cpp/libx264/common/arm/cpu-a.S
+++ b/library/src/main/cpp/libx264/common/arm/cpu-a.S
-/*****************************************************************************
- * cpu-a.S: arm cpu detection
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "asm.S"
-
-.align 2
-
-// done in gas because .fpu neon overrides the refusal to assemble
-// instructions the selected -march/-mcpu doesn't support
-function x264_cpu_neon_test
-    vadd.i16    q0, q0, q0
-    bx          lr
-endfunc
-
-// return: 0 on success
-//         1 if counters were already enabled
-//         9 if lo-res counters were already enabled
-function x264_cpu_enable_armv7_counter, export=0
-    mrc         p15, 0, r2, c9, c12, 0      // read PMNC
-    ands        r0, r2, #1
-    andne       r0, r2, #9
-
-    orr         r2, r2, #1                  // enable counters
-    bic         r2, r2, #8                  // full resolution
-    mcreq       p15, 0, r2, c9, c12, 0      // write PMNC
-    mov         r2, #1 << 31                // enable cycle counter
-    mcr         p15, 0, r2, c9, c12, 1      // write CNTENS
-    bx          lr
-endfunc
-
-function x264_cpu_disable_armv7_counter, export=0
-    mrc         p15, 0, r0, c9, c12, 0      // read PMNC
-    bic         r0, r0, #1                  // disable counters
-    mcr         p15, 0, r0, c9, c12, 0      // write PMNC
-    bx          lr
-endfunc
-
-
-.macro READ_TIME r
-    mrc         p15, 0, \r, c9, c13, 0
-.endm
-
-// return: 0 if transfers neon -> arm transfers take more than 10 cycles
-//         nonzero otherwise
-function x264_cpu_fast_neon_mrc_test
-    // check for user access to performance counters
-    mrc         p15, 0, r0, c9, c14, 0
-    cmp         r0, #0
-    bxeq        lr
-
-    push        {r4-r6,lr}
-    bl          x264_cpu_enable_armv7_counter
-    ands        r1, r0, #8
-    mov         r3, #0
-    mov         ip, #4
-    mov         r6, #4
-    moveq       r5, #1
-    movne       r5, #64
-
-average_loop:
-    mov         r4, r5
-    READ_TIME   r1
-1:  subs        r4, r4, #1
-.rept 8
-    vmov.u32    lr, d0[0]
-    add         lr, lr, lr
-.endr
-    bgt         1b
-    READ_TIME   r2
-
-    subs        r6, r6, #1
-    sub         r2, r2, r1
-    cmpgt       r2, #30 << 3    // assume context switch if it took over 30 cycles
-    addle       r3, r3, r2
-    subsle      ip, ip, #1
-    bgt         average_loop
-
-    // disable counters if we enabled them
-    ands        r0, r0, #1
-    bleq        x264_cpu_disable_armv7_counter
-
-    lsr         r0, r3, #5
-    cmp         r0, #10
-    movgt       r0, #0
-    pop         {r4-r6,pc}
-endfunc
--- a/library/src/main/cpp/libx264/common/arm/dct-a.S
+++ b/library/src/main/cpp/libx264/common/arm/dct-a.S
--- a/library/src/main/cpp/libx264/common/arm/dct.h
+++ b/library/src/main/cpp/libx264/common/arm/dct.h
-/*****************************************************************************
- * dct.h: arm transform and zigzag
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_ARM_DCT_H
-#define X264_ARM_DCT_H
-
-void x264_dct4x4dc_neon( int16_t d[16] );
-void x264_idct4x4dc_neon( int16_t d[16] );
-
-void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
-
-void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
-void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
-void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
-
-void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] );
-void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] );
-void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 );
-
-void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
-
-void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] );
-void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] );
-
-void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] );
-
-#endif
--- a/library/src/main/cpp/libx264/common/arm/deblock-a.S
+++ b/library/src/main/cpp/libx264/common/arm/deblock-a.S
--- a/library/src/main/cpp/libx264/common/arm/mc-a.S
+++ b/library/src/main/cpp/libx264/common/arm/mc-a.S
--- a/library/src/main/cpp/libx264/common/arm/mc-c.c
+++ b/library/src/main/cpp/libx264/common/arm/mc-c.c
-/*****************************************************************************
- * mc-c.c: arm motion compensation
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *          Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include "common/common.h"
-#include "mc.h"
-
-void x264_prefetch_ref_arm( uint8_t *, intptr_t, int );
-void x264_prefetch_fenc_arm( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
-void x264_memzero_aligned_neon( void *dst, size_t n );
-
-void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_8x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
-
-void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
-                                pixel *src, intptr_t i_src, int w, int h );
-void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
-                                         pixel *dstv, intptr_t i_dstv,
-                                         pixel *src,  intptr_t i_src, int w, int h );
-void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
-                                            pixel *dstb, intptr_t i_dstb,
-                                            pixel *dstc, intptr_t i_dstc,
-                                            pixel *src,  intptr_t i_src, int pw, int w, int h );
-void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
-                                           pixel *srcu, intptr_t i_srcu,
-                                           pixel *srcv, intptr_t i_srcv, int w, int h );
-void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
-                                     pixel *src, intptr_t i_src, int w, int h );
-
-void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
-void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
-void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
-
-#if !HIGH_BIT_DEPTH
-#define MC_WEIGHT(func)\
-void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
-\
-static weight_fn_t x264_mc##func##_wtab_neon[6] =\
-{\
-    x264_mc_weight_w4##func##_neon,\
-    x264_mc_weight_w4##func##_neon,\
-    x264_mc_weight_w8##func##_neon,\
-    x264_mc_weight_w16##func##_neon,\
-    x264_mc_weight_w16##func##_neon,\
-    x264_mc_weight_w20##func##_neon,\
-};
-
-MC_WEIGHT()
-MC_WEIGHT(_nodenom)
-MC_WEIGHT(_offsetadd)
-MC_WEIGHT(_offsetsub)
-#endif
-
-void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-void x264_mc_copy_w16_aligned_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
-
-void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
-
-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, intptr_t, int );
-void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
-void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
-
-void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
-void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
-void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
-void x264_integral_init8v_neon( uint16_t *, intptr_t );
-
-void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
-
-void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
-void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
-
-#if !HIGH_BIT_DEPTH
-static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
-{
-    if( w->i_scale == 1<<w->i_denom )
-    {
-        if( w->i_offset < 0 )
-        {
-            w->weightfn = x264_mc_offsetsub_wtab_neon;
-            w->cachea[0] = -w->i_offset;
-        }
-        else
-        {
-            w->weightfn = x264_mc_offsetadd_wtab_neon;
-            w->cachea[0] = w->i_offset;
-        }
-    }
-    else if( !w->i_denom )
-        w->weightfn = x264_mc_nodenom_wtab_neon;
-    else
-        w->weightfn = x264_mc_wtab_neon;
-}
-
-static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
-{
-    NULL,
-    x264_pixel_avg2_w4_neon,
-    x264_pixel_avg2_w8_neon,
-    x264_pixel_avg2_w16_neon,   // no slower than w12, so no point in a separate function
-    x264_pixel_avg2_w16_neon,
-    x264_pixel_avg2_w20_neon,
-};
-
-static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
-{
-    NULL,
-    x264_mc_copy_w4_neon,
-    x264_mc_copy_w8_neon,
-    NULL,
-    x264_mc_copy_w16_neon,
-};
-
-static void mc_luma_neon( uint8_t *dst,    intptr_t i_dst_stride,
-                          uint8_t *src[4], intptr_t i_src_stride,
-                          int mvx, int mvy,
-                          int i_width, int i_height, const x264_weight_t *weight )
-{
-    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
-    if( (mvy&3) == 3 )             // explict if() to force conditional add
-        src1 += i_src_stride;
-
-    if( qpel_idx & 5 ) /* qpel interpolation needed */
-    {
-        uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
-                dst, i_dst_stride, src1, i_src_stride,
-                src2, i_height );
-        if( weight->weightfn )
-            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
-    }
-    else if( weight->weightfn )
-        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
-    else
-        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
-}
-
-static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
-                              uint8_t *src[4], intptr_t i_src_stride,
-                              int mvx, int mvy,
-                              int i_width, int i_height, const x264_weight_t *weight )
-{
-    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
-    if( (mvy&3) == 3 )             // explict if() to force conditional add
-        src1 += i_src_stride;
-
-    if( qpel_idx & 5 ) /* qpel interpolation needed */
-    {
-        uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
-                dst, *i_dst_stride, src1, i_src_stride,
-                src2, i_height );
-        if( weight->weightfn )
-            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
-        return dst;
-    }
-    else if( weight->weightfn )
-    {
-        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
-        return dst;
-    }
-    else
-    {
-        *i_dst_stride = i_src_stride;
-        return src1;
-    }
-}
-
-static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
-                              intptr_t stride, int width, int height, int16_t *buf )
-{
-    intptr_t realign = (intptr_t)src & 15;
-    src -= realign;
-    dstv -= realign;
-    dstc -= realign;
-    dsth -= realign;
-    width += realign;
-    while( height-- )
-    {
-        x264_hpel_filter_v_neon( dstv, src, buf+8, stride, width );
-        x264_hpel_filter_c_neon( dstc, buf+8, width );
-        x264_hpel_filter_h_neon( dsth, src, width );
-        dsth += stride;
-        dstv += stride;
-        dstc += stride;
-        src  += stride;
-    }
-}
-
-PLANE_COPY(16, neon)
-PLANE_COPY_SWAP(16, neon)
-PLANE_INTERLEAVE(neon)
-#endif // !HIGH_BIT_DEPTH
-
-PROPAGATE_LIST(neon)
-
-void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
-{
-    if( !(cpu&X264_CPU_ARMV6) )
-        return;
-
-#if !HIGH_BIT_DEPTH
-    pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
-    pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
-    pf->prefetch_ref  = x264_prefetch_ref_arm;
-#endif // !HIGH_BIT_DEPTH
-
-    if( !(cpu&X264_CPU_NEON) )
-        return;
-
-#if !HIGH_BIT_DEPTH
-    pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
-    pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
-    pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
-    pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
-
-    pf->plane_copy              = x264_plane_copy_neon;
-    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
-    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
-    pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
-    pf->plane_copy_swap = x264_plane_copy_swap_neon;
-
-    pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
-    pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
-    pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
-
-    pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
-    pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
-    pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;
-    pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_neon;
-    pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_neon;
-    pf->avg[PIXEL_4x16]  = x264_pixel_avg_4x16_neon;
-    pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_neon;
-    pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
-    pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
-
-    pf->weight    = x264_mc_wtab_neon;
-    pf->offsetadd = x264_mc_offsetadd_wtab_neon;
-    pf->offsetsub = x264_mc_offsetsub_wtab_neon;
-    pf->weight_cache = x264_weight_cache_neon;
-
-    pf->mc_chroma = x264_mc_chroma_neon;
-    pf->mc_luma = mc_luma_neon;
-    pf->get_ref = get_ref_neon;
-    pf->hpel_filter = hpel_filter_neon;
-    pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
-
-    pf->integral_init4h = x264_integral_init4h_neon;
-    pf->integral_init8h = x264_integral_init8h_neon;
-    pf->integral_init4v = x264_integral_init4v_neon;
-    pf->integral_init8v = x264_integral_init8v_neon;
-
-    pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon;
-    pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_neon;
-    pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_neon;
-#endif // !HIGH_BIT_DEPTH
-
-// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
-#ifndef SYS_MACOSX
-    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
-#endif
-    pf->memzero_aligned = x264_memzero_aligned_neon;
-}
--- a/library/src/main/cpp/libx264/common/arm/mc.h
+++ b/library/src/main/cpp/libx264/common/arm/mc.h
-/*****************************************************************************
- * mc.h: arm motion compensation
- *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_ARM_MC_H
-#define X264_ARM_MC_H
-
-void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf );
-
-#endif