225 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			225 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
 | |
| index 00f93bf59f..52da7036f3 100644
 | |
| --- a/libavcodec/aarch64/Makefile
 | |
| +++ b/libavcodec/aarch64/Makefile
 | |
| @@ -6,6 +6,7 @@ OBJS-$(CONFIG_H264DSP)                  += aarch64/h264dsp_init_aarch64.o
 | |
|  OBJS-$(CONFIG_H264PRED)                 += aarch64/h264pred_init.o
 | |
|  OBJS-$(CONFIG_H264QPEL)                 += aarch64/h264qpel_init_aarch64.o
 | |
|  OBJS-$(CONFIG_HPELDSP)                  += aarch64/hpeldsp_init_aarch64.o
 | |
| +OBJS-$(CONFIG_IDCTDSP)                  += aarch64/idctdsp_init_aarch64.o
 | |
|  OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
 | |
|  OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
 | |
|  OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o
 | |
| @@ -21,6 +22,7 @@ OBJS-$(CONFIG_VC1DSP)                   += aarch64/vc1dsp_init_aarch64.o
 | |
|  OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
 | |
|  OBJS-$(CONFIG_VP9_DECODER)              += aarch64/vp9dsp_init_10bpp_aarch64.o \
 | |
|                                             aarch64/vp9dsp_init_12bpp_aarch64.o \
 | |
| +                                           aarch64/vp9mc_aarch64.o             \
 | |
|                                             aarch64/vp9dsp_init_aarch64.o
 | |
|  
 | |
|  # ARMv8 optimizations
 | |
| @@ -41,8 +43,7 @@ NEON-OBJS-$(CONFIG_H264PRED)            += aarch64/h264pred_neon.o
 | |
|  NEON-OBJS-$(CONFIG_H264QPEL)            += aarch64/h264qpel_neon.o             \
 | |
|                                             aarch64/hpeldsp_neon.o
 | |
|  NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
 | |
| -NEON-OBJS-$(CONFIG_IDCTDSP)             += aarch64/idctdsp_init_aarch64.o      \
 | |
| -                                           aarch64/simple_idct_neon.o
 | |
| +NEON-OBJS-$(CONFIG_IDCTDSP)             += aarch64/simple_idct_neon.o
 | |
|  NEON-OBJS-$(CONFIG_MDCT)                += aarch64/mdct_neon.o
 | |
|  NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
 | |
|  NEON-OBJS-$(CONFIG_VP8DSP)              += aarch64/vp8dsp_neon.o
 | |
| diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c
 | |
| index 0406e60830..742a3372e3 100644
 | |
| --- a/libavcodec/aarch64/idctdsp_init_aarch64.c
 | |
| +++ b/libavcodec/aarch64/idctdsp_init_aarch64.c
 | |
| @@ -21,6 +21,8 @@
 | |
|   */
 | |
|  
 | |
|  #include "libavutil/attributes.h"
 | |
| +#include "libavutil/cpu.h"
 | |
| +#include "libavutil/arm/cpu.h"
 | |
|  #include "libavcodec/avcodec.h"
 | |
|  #include "libavcodec/idctdsp.h"
 | |
|  #include "idct.h"
 | |
| @@ -28,7 +30,9 @@
 | |
|  av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
 | |
|                                       unsigned high_bit_depth)
 | |
|  {
 | |
| -    if (!avctx->lowres && !high_bit_depth) {
 | |
| +    int cpu_flags = av_get_cpu_flags();
 | |
| +
 | |
| +    if (have_neon(cpu_flags) && !avctx->lowres && !high_bit_depth) {
 | |
|          if (avctx->idct_algo == FF_IDCT_AUTO ||
 | |
|              avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
 | |
|              avctx->idct_algo == FF_IDCT_SIMPLENEON) {
 | |
| diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
 | |
| index cac6428709..53b372c262 100644
 | |
| --- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
 | |
| +++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
 | |
| @@ -25,31 +25,6 @@
 | |
|  //                            const uint8_t *ref, ptrdiff_t ref_stride,
 | |
|  //                            int h, int mx, int my);
 | |
|  
 | |
| -function ff_vp9_copy128_aarch64, export=1
 | |
| -1:
 | |
| -        ldp             x5,  x6,  [x2]
 | |
| -        ldp             x7,  x8,  [x2, #16]
 | |
| -        stp             x5,  x6,  [x0]
 | |
| -        ldp             x9,  x10, [x2, #32]
 | |
| -        stp             x7,  x8,  [x0, #16]
 | |
| -        subs            w4,  w4,  #1
 | |
| -        ldp             x11, x12, [x2, #48]
 | |
| -        stp             x9,  x10, [x0, #32]
 | |
| -        stp             x11, x12, [x0, #48]
 | |
| -        ldp             x5,  x6,  [x2, #64]
 | |
| -        ldp             x7,  x8,  [x2, #80]
 | |
| -        stp             x5,  x6,  [x0, #64]
 | |
| -        ldp             x9,  x10, [x2, #96]
 | |
| -        stp             x7,  x8,  [x0, #80]
 | |
| -        ldp             x11, x12, [x2, #112]
 | |
| -        stp             x9,  x10, [x0, #96]
 | |
| -        stp             x11, x12, [x0, #112]
 | |
| -        add             x2,  x2,  x3
 | |
| -        add             x0,  x0,  x1
 | |
| -        b.ne            1b
 | |
| -        ret
 | |
| -endfunc
 | |
| -
 | |
|  function ff_vp9_avg64_16_neon, export=1
 | |
|          mov             x5,  x0
 | |
|          sub             x1,  x1,  #64
 | |
| diff --git a/libavcodec/aarch64/vp9mc_aarch64.S b/libavcodec/aarch64/vp9mc_aarch64.S
 | |
| new file mode 100644
 | |
| index 0000000000..f17a8cf04a
 | |
| --- /dev/null
 | |
| +++ b/libavcodec/aarch64/vp9mc_aarch64.S
 | |
| @@ -0,0 +1,81 @@
 | |
| +/*
 | |
| + * Copyright (c) 2016 Google Inc.
 | |
| + *
 | |
| + * This file is part of FFmpeg.
 | |
| + *
 | |
| + * FFmpeg is free software; you can redistribute it and/or
 | |
| + * modify it under the terms of the GNU Lesser General Public
 | |
| + * License as published by the Free Software Foundation; either
 | |
| + * version 2.1 of the License, or (at your option) any later version.
 | |
| + *
 | |
| + * FFmpeg is distributed in the hope that it will be useful,
 | |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| + * Lesser General Public License for more details.
 | |
| + *
 | |
| + * You should have received a copy of the GNU Lesser General Public
 | |
| + * License along with FFmpeg; if not, write to the Free Software
 | |
| + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
| + */
 | |
| +
 | |
| +#include "libavutil/aarch64/asm.S"
 | |
| +
 | |
| +// All public functions in this file have the following signature:
 | |
| +// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
 | |
| +//                            const uint8_t *ref, ptrdiff_t ref_stride,
 | |
| +//                            int h, int mx, int my);
 | |
| +
 | |
| +function ff_vp9_copy128_aarch64, export=1
 | |
| +1:
 | |
| +        ldp             x5,  x6,  [x2]
 | |
| +        ldp             x7,  x8,  [x2, #16]
 | |
| +        stp             x5,  x6,  [x0]
 | |
| +        ldp             x9,  x10, [x2, #32]
 | |
| +        stp             x7,  x8,  [x0, #16]
 | |
| +        subs            w4,  w4,  #1
 | |
| +        ldp             x11, x12, [x2, #48]
 | |
| +        stp             x9,  x10, [x0, #32]
 | |
| +        stp             x11, x12, [x0, #48]
 | |
| +        ldp             x5,  x6,  [x2, #64]
 | |
| +        ldp             x7,  x8,  [x2, #80]
 | |
| +        stp             x5,  x6,  [x0, #64]
 | |
| +        ldp             x9,  x10, [x2, #96]
 | |
| +        stp             x7,  x8,  [x0, #80]
 | |
| +        ldp             x11, x12, [x2, #112]
 | |
| +        stp             x9,  x10, [x0, #96]
 | |
| +        stp             x11, x12, [x0, #112]
 | |
| +        add             x2,  x2,  x3
 | |
| +        add             x0,  x0,  x1
 | |
| +        b.ne            1b
 | |
| +        ret
 | |
| +endfunc
 | |
| +
 | |
| +function ff_vp9_copy64_aarch64, export=1
 | |
| +1:
 | |
| +        ldp             x5,  x6,  [x2]
 | |
| +        ldp             x7,  x8,  [x2, #16]
 | |
| +        stp             x5,  x6,  [x0]
 | |
| +        ldp             x9,  x10, [x2, #32]
 | |
| +        stp             x7,  x8,  [x0, #16]
 | |
| +        subs            w4,  w4,  #1
 | |
| +        ldp             x11, x12, [x2, #48]
 | |
| +        stp             x9,  x10, [x0, #32]
 | |
| +        stp             x11, x12, [x0, #48]
 | |
| +        add             x2,  x2,  x3
 | |
| +        add             x0,  x0,  x1
 | |
| +        b.ne            1b
 | |
| +        ret
 | |
| +endfunc
 | |
| +
 | |
| +function ff_vp9_copy32_aarch64, export=1
 | |
| +1:
 | |
| +        ldp             x5,  x6,  [x2]
 | |
| +        ldp             x7,  x8,  [x2, #16]
 | |
| +        stp             x5,  x6,  [x0]
 | |
| +        subs            w4,  w4,  #1
 | |
| +        stp             x7,  x8,  [x0, #16]
 | |
| +        add             x2,  x2,  x3
 | |
| +        add             x0,  x0,  x1
 | |
| +        b.ne            1b
 | |
| +        ret
 | |
| +endfunc
 | |
| diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
 | |
| index f67624ca04..abf2bae9db 100644
 | |
| --- a/libavcodec/aarch64/vp9mc_neon.S
 | |
| +++ b/libavcodec/aarch64/vp9mc_neon.S
 | |
| @@ -25,23 +25,6 @@
 | |
|  //                            const uint8_t *ref, ptrdiff_t ref_stride,
 | |
|  //                            int h, int mx, int my);
 | |
|  
 | |
| -function ff_vp9_copy64_aarch64, export=1
 | |
| -1:
 | |
| -        ldp             x5,  x6,  [x2]
 | |
| -        ldp             x7,  x8,  [x2, #16]
 | |
| -        stp             x5,  x6,  [x0]
 | |
| -        ldp             x9,  x10, [x2, #32]
 | |
| -        stp             x7,  x8,  [x0, #16]
 | |
| -        subs            w4,  w4,  #1
 | |
| -        ldp             x11, x12, [x2, #48]
 | |
| -        stp             x9,  x10, [x0, #32]
 | |
| -        stp             x11, x12, [x0, #48]
 | |
| -        add             x2,  x2,  x3
 | |
| -        add             x0,  x0,  x1
 | |
| -        b.ne            1b
 | |
| -        ret
 | |
| -endfunc
 | |
| -
 | |
|  function ff_vp9_avg64_neon, export=1
 | |
|          mov             x5,  x0
 | |
|  1:
 | |
| @@ -64,19 +47,6 @@ function ff_vp9_avg64_neon, export=1
 | |
|          ret
 | |
|  endfunc
 | |
|  
 | |
| -function ff_vp9_copy32_aarch64, export=1
 | |
| -1:
 | |
| -        ldp             x5,  x6,  [x2]
 | |
| -        ldp             x7,  x8,  [x2, #16]
 | |
| -        stp             x5,  x6,  [x0]
 | |
| -        subs            w4,  w4,  #1
 | |
| -        stp             x7,  x8,  [x0, #16]
 | |
| -        add             x2,  x2,  x3
 | |
| -        add             x0,  x0,  x1
 | |
| -        b.ne            1b
 | |
| -        ret
 | |
| -endfunc
 | |
| -
 | |
|  function ff_vp9_avg32_neon, export=1
 | |
|  1:
 | |
|          ld1             {v2.16b, v3.16b},  [x2], x3
 | 
