Copy as Markdown

Other Tools

.syntax unified
@********************************************************************
@* *
@* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
@* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
@* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
@* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
@* *
@* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
@* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
@* *
@********************************************************************
@ Original implementation:
@ Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
@ last mod: $Id$
@********************************************************************
.text; .p2align 2
.include "armopts-gnu.S"
@ Vanilla ARM v4 versions
.global oc_frag_copy_list_arm
.global oc_frag_recon_intra_arm
.global oc_frag_recon_inter_arm
.global oc_frag_recon_inter2_arm
.type oc_frag_copy_list_arm, %function; oc_frag_copy_list_arm: @ PROC
@ r0 = _dst_frame
@ r1 = _src_frame
@ r2 = _ystride
@ r3 = _fragis
@ <> = _nfragis
@ <> = _frag_buf_offs
LDR r12,[r13] @ r12 = _nfragis
STMFD r13!,{r4-r6,r11,r14}
SUBS r12, r12, #1
LDR r4,[r3],#4 @ r4 = _fragis[fragii]
LDRGE r14,[r13,#4*6] @ r14 = _frag_buf_offs
BLT ofcl_arm_end
SUB r2, r2, #4
ofcl_arm_lp:
LDR r11,[r14,r4,LSL #2] @ r11 = _frag_buf_offs[_fragis[fragii]]
SUBS r12, r12, #1
@ Stall (on XScale)
ADD r4, r1, r11 @ r4 = _src_frame+frag_buf_off
LDR r6, [r4], #4
ADD r11,r0, r11 @ r11 = _dst_frame+frag_buf_off
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4], r2
STR r6, [r11],#4
LDR r6, [r4], #4
STR r5, [r11],r2
LDR r5, [r4]
LDRGE r4,[r3],#4 @ r4 = _fragis[fragii]
STR r6, [r11],#4
STR r5, [r11]
BGE ofcl_arm_lp
ofcl_arm_end:
LDMFD r13!,{r4-r6,r11,PC}
oc_frag_recon_intra_arm:
@ r0 = unsigned char *_dst
@ r1 = int _ystride
@ r2 = const ogg_int16_t _residue[64]
STMFD r13!,{r4,r5,r14}
MOV r14,#8
MOV r5, #255
SUB r1, r1, #7
ofrintra_lp_arm:
LDRSH r3, [r2], #2
LDRSH r4, [r2], #2
LDRSH r12,[r2], #2
ADDS r3, r3, #128
CMPGT r5, r3
EORLT r3, r5, r3, ASR #32
STRB r3, [r0], #1
ADDS r4, r4, #128
CMPGT r5, r4
EORLT r4, r5, r4, ASR #32
LDRSH r3, [r2], #2
STRB r4, [r0], #1
ADDS r12,r12,#128
CMPGT r5, r12
EORLT r12,r5, r12,ASR #32
LDRSH r4, [r2], #2
STRB r12,[r0], #1
ADDS r3, r3, #128
CMPGT r5, r3
EORLT r3, r5, r3, ASR #32
LDRSH r12,[r2], #2
STRB r3, [r0], #1
ADDS r4, r4, #128
CMPGT r5, r4
EORLT r4, r5, r4, ASR #32
LDRSH r3, [r2], #2
STRB r4, [r0], #1
ADDS r12,r12,#128
CMPGT r5, r12
EORLT r12,r5, r12,ASR #32
LDRSH r4, [r2], #2
STRB r12,[r0], #1
ADDS r3, r3, #128
CMPGT r5, r3
EORLT r3, r5, r3, ASR #32
STRB r3, [r0], #1
ADDS r4, r4, #128
CMPGT r5, r4
EORLT r4, r5, r4, ASR #32
STRB r4, [r0], r1
SUBS r14,r14,#1
BGT ofrintra_lp_arm
LDMFD r13!,{r4,r5,PC}
.size oc_frag_copy_list_arm, .-oc_frag_copy_list_arm @ ENDP
.type oc_frag_recon_inter_arm, %function; oc_frag_recon_inter_arm: @ PROC
@ r0 = unsigned char *dst
@ r1 = const unsigned char *src
@ r2 = int ystride
@ r3 = const ogg_int16_t residue[64]
STMFD r13!,{r5,r9-r11,r14}
MOV r9, #8
MOV r5, #255
SUB r2, r2, #7
ofrinter_lp_arm:
LDRSH r12,[r3], #2
LDRB r14,[r1], #1
LDRSH r11,[r3], #2
LDRB r10,[r1], #1
ADDS r12,r12,r14
CMPGT r5, r12
EORLT r12,r5, r12,ASR #32
STRB r12,[r0], #1
ADDS r11,r11,r10
CMPGT r5, r11
LDRSH r12,[r3], #2
LDRB r14,[r1], #1
EORLT r11,r5, r11,ASR #32
STRB r11,[r0], #1
ADDS r12,r12,r14
CMPGT r5, r12
LDRSH r11,[r3], #2
LDRB r10,[r1], #1
EORLT r12,r5, r12,ASR #32
STRB r12,[r0], #1
ADDS r11,r11,r10
CMPGT r5, r11
LDRSH r12,[r3], #2
LDRB r14,[r1], #1
EORLT r11,r5, r11,ASR #32
STRB r11,[r0], #1
ADDS r12,r12,r14
CMPGT r5, r12
LDRSH r11,[r3], #2
LDRB r10,[r1], #1
EORLT r12,r5, r12,ASR #32
STRB r12,[r0], #1
ADDS r11,r11,r10
CMPGT r5, r11
LDRSH r12,[r3], #2
LDRB r14,[r1], #1
EORLT r11,r5, r11,ASR #32
STRB r11,[r0], #1
ADDS r12,r12,r14
CMPGT r5, r12
LDRSH r11,[r3], #2
LDRB r10,[r1], r2
EORLT r12,r5, r12,ASR #32
STRB r12,[r0], #1
ADDS r11,r11,r10
CMPGT r5, r11
EORLT r11,r5, r11,ASR #32
STRB r11,[r0], r2
SUBS r9, r9, #1
BGT ofrinter_lp_arm
LDMFD r13!,{r5,r9-r11,PC}
.size oc_frag_recon_inter_arm, .-oc_frag_recon_inter_arm @ ENDP
.type oc_frag_recon_inter2_arm, %function; oc_frag_recon_inter2_arm: @ PROC
@ r0 = unsigned char *dst
@ r1 = const unsigned char *src1
@ r2 = const unsigned char *src2
@ r3 = int ystride
LDR r12,[r13]
@ r12= const ogg_int16_t residue[64]
STMFD r13!,{r4-r8,r14}
MOV r14,#8
MOV r8, #255
SUB r3, r3, #7
ofrinter2_lp_arm:
LDRB r5, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
LDRB r7, [r1], #1
ADD r5, r5, r6
ADDS r5, r4, r5, LSR #1
CMPGT r8, r5
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r5, r8, r5, ASR #32
STRB r5, [r0], #1
ADD r7, r7, r6
ADDS r7, r4, r7, LSR #1
CMPGT r8, r7
LDRB r5, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r7, r8, r7, ASR #32
STRB r7, [r0], #1
ADD r5, r5, r6
ADDS r5, r4, r5, LSR #1
CMPGT r8, r5
LDRB r7, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r5, r8, r5, ASR #32
STRB r5, [r0], #1
ADD r7, r7, r6
ADDS r7, r4, r7, LSR #1
CMPGT r8, r7
LDRB r5, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r7, r8, r7, ASR #32
STRB r7, [r0], #1
ADD r5, r5, r6
ADDS r5, r4, r5, LSR #1
CMPGT r8, r5
LDRB r7, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r5, r8, r5, ASR #32
STRB r5, [r0], #1
ADD r7, r7, r6
ADDS r7, r4, r7, LSR #1
CMPGT r8, r7
LDRB r5, [r1], #1
LDRB r6, [r2], #1
LDRSH r4, [r12],#2
EORLT r7, r8, r7, ASR #32
STRB r7, [r0], #1
ADD r5, r5, r6
ADDS r5, r4, r5, LSR #1
CMPGT r8, r5
LDRB r7, [r1], r3
LDRB r6, [r2], r3
LDRSH r4, [r12],#2
EORLT r5, r8, r5, ASR #32
STRB r5, [r0], #1
ADD r7, r7, r6
ADDS r7, r4, r7, LSR #1
CMPGT r8, r7
EORLT r7, r8, r7, ASR #32
STRB r7, [r0], r3
SUBS r14,r14,#1
BGT ofrinter2_lp_arm
LDMFD r13!,{r4-r8,PC}
.size oc_frag_recon_inter2_arm, .-oc_frag_recon_inter2_arm @ ENDP
.if OC_ARM_ASM_EDSP
.global oc_frag_copy_list_edsp
.type oc_frag_copy_list_edsp, %function; oc_frag_copy_list_edsp: @ PROC
@ r0 = _dst_frame
@ r1 = _src_frame
@ r2 = _ystride
@ r3 = _fragis
@ <> = _nfragis
@ <> = _frag_buf_offs
LDR r12,[r13] @ r12 = _nfragis
STMFD r13!,{r4-r11,r14}
SUBS r12, r12, #1
LDRGE r5, [r3],#4 @ r5 = _fragis[fragii]
LDRGE r14,[r13,#4*10] @ r14 = _frag_buf_offs
BLT ofcl_edsp_end
ofcl_edsp_lp:
MOV r4, r1
LDR r5, [r14,r5, LSL #2] @ r5 = _frag_buf_offs[_fragis[fragii]]
SUBS r12, r12, #1
@ Stall (on XScale)
LDRD r6, [r4, r5]! @ r4 = _src_frame+frag_buf_off
LDRD r8, [r4, r2]!
@ Stall
STRD r6, [r5, r0]! @ r5 = _dst_frame+frag_buf_off
STRD r8, [r5, r2]!
@ Stall
LDRD r6, [r4, r2]! @ On Xscale at least, doing 3 consecutive
LDRD r8, [r4, r2]! @ loads causes a stall, but thats no worse
LDRD r10,[r4, r2]! @ than us only doing 2, and having to do
@ another pair of LDRD/STRD later on.
@ Stall
STRD r6, [r5, r2]!
STRD r8, [r5, r2]!
STRD r10,[r5, r2]!
LDRD r6, [r4, r2]!
LDRD r8, [r4, r2]!
LDRD r10,[r4, r2]!
STRD r6, [r5, r2]!
STRD r8, [r5, r2]!
STRD r10,[r5, r2]!
LDRGE r5, [r3],#4 @ r5 = _fragis[fragii]
BGE ofcl_edsp_lp
ofcl_edsp_end:
LDMFD r13!,{r4-r11,PC}
.size oc_frag_copy_list_edsp, .-oc_frag_copy_list_edsp @ ENDP
.endif
.if OC_ARM_ASM_MEDIA
.global oc_frag_recon_intra_v6
.global oc_frag_recon_inter_v6
.global oc_frag_recon_inter2_v6
.type oc_frag_recon_intra_v6, %function; oc_frag_recon_intra_v6: @ PROC
@ r0 = unsigned char *_dst
@ r1 = int _ystride
@ r2 = const ogg_int16_t _residue[64]
STMFD r13!,{r4-r6,r14}
MOV r14,#8
MOV r12,r2
LDR r6, =0x00800080
ofrintra_v6_lp:
LDRD r2, [r12],#8 @ r2 = 11110000 r3 = 33332222
LDRD r4, [r12],#8 @ r4 = 55554444 r5 = 77776666
SUBS r14,r14,#1
QADD16 r2, r2, r6
QADD16 r3, r3, r6
QADD16 r4, r4, r6
QADD16 r5, r5, r6
USAT16 r2, #8, r2 @ r2 = __11__00
USAT16 r3, #8, r3 @ r3 = __33__22
USAT16 r4, #8, r4 @ r4 = __55__44
USAT16 r5, #8, r5 @ r5 = __77__66
ORR r2, r2, r2, LSR #8 @ r2 = __111100
ORR r3, r3, r3, LSR #8 @ r3 = __333322
ORR r4, r4, r4, LSR #8 @ r4 = __555544
ORR r5, r5, r5, LSR #8 @ r5 = __777766
PKHBT r2, r2, r3, LSL #16 @ r2 = 33221100
PKHBT r3, r4, r5, LSL #16 @ r3 = 77665544
STRD r2, r3, [r0], r1
BGT ofrintra_v6_lp
LDMFD r13!,{r4-r6,PC}
.size oc_frag_recon_intra_v6, .-oc_frag_recon_intra_v6 @ ENDP
.type oc_frag_recon_inter_v6, %function; oc_frag_recon_inter_v6: @ PROC
@ r0 = unsigned char *_dst
@ r1 = const unsigned char *_src
@ r2 = int _ystride
@ r3 = const ogg_int16_t _residue[64]
STMFD r13!,{r4-r7,r14}
MOV r14,#8
ofrinter_v6_lp:
LDRD r6, [r3], #8 @ r6 = 11110000 r7 = 33332222
SUBS r14,r14,#1
.if OC_ARM_CAN_UNALIGN_LDRD
LDRD r4, [r1], r2 @ Unaligned ; r4 = 33221100 r5 = 77665544
.else
LDR r5, [r1, #4]
LDR r4, [r1], r2
.endif
PKHBT r12,r6, r7, LSL #16 @ r12= 22220000
PKHTB r7, r7, r6, ASR #16 @ r7 = 33331111
UXTB16 r6,r4 @ r6 = __22__00
UXTB16 r4,r4, ROR #8 @ r4 = __33__11
QADD16 r12,r12,r6 @ r12= xx22xx00
QADD16 r4, r7, r4 @ r4 = xx33xx11
LDRD r6, [r3], #8 @ r6 = 55554444 r7 = 77776666
USAT16 r4, #8, r4 @ r4 = __33__11
USAT16 r12,#8,r12 @ r12= __22__00
ORR r4, r12,r4, LSL #8 @ r4 = 33221100
PKHBT r12,r6, r7, LSL #16 @ r12= 66664444
PKHTB r7, r7, r6, ASR #16 @ r7 = 77775555
UXTB16 r6,r5 @ r6 = __66__44
UXTB16 r5,r5, ROR #8 @ r5 = __77__55
QADD16 r12,r12,r6 @ r12= xx66xx44
QADD16 r5, r7, r5 @ r5 = xx77xx55
USAT16 r12,#8, r12 @ r12= __66__44
USAT16 r5, #8, r5 @ r4 = __77__55
ORR r5, r12,r5, LSL #8 @ r5 = 33221100
STRD r4, r5, [r0], r2
BGT ofrinter_v6_lp
LDMFD r13!,{r4-r7,PC}
.size oc_frag_recon_inter_v6, .-oc_frag_recon_inter_v6 @ ENDP
.type oc_frag_recon_inter2_v6, %function; oc_frag_recon_inter2_v6: @ PROC
@ r0 = unsigned char *_dst
@ r1 = const unsigned char *_src1
@ r2 = const unsigned char *_src2
@ r3 = int _ystride
LDR r12,[r13]
@ r12= const ogg_int16_t _residue[64]
STMFD r13!,{r4-r9,r14}
MOV r14,#8
ofrinter2_v6_lp:
LDRD r6, [r12,#8] @ r6 = 55554444 r7 = 77776666
SUBS r14,r14,#1
LDR r4, [r1, #4] @ Unaligned ; r4 = src1[1] = 77665544
LDR r5, [r2, #4] @ Unaligned ; r5 = src2[1] = 77665544
PKHBT r8, r6, r7, LSL #16 @ r8 = 66664444
PKHTB r9, r7, r6, ASR #16 @ r9 = 77775555
UHADD8 r4, r4, r5 @ r4 = (src1[7,6,5,4] + src2[7,6,5,4])>>1
UXTB16 r5, r4 @ r5 = __66__44
UXTB16 r4, r4, ROR #8 @ r4 = __77__55
QADD16 r8, r8, r5 @ r8 = xx66xx44
QADD16 r9, r9, r4 @ r9 = xx77xx55
LDRD r6,[r12],#16 @ r6 = 33332222 r7 = 11110000
USAT16 r8, #8, r8 @ r8 = __66__44
LDR r4, [r1], r3 @ Unaligned ; r4 = src1[0] = 33221100
USAT16 r9, #8, r9 @ r9 = __77__55
LDR r5, [r2], r3 @ Unaligned ; r5 = src2[0] = 33221100
ORR r9, r8, r9, LSL #8 @ r9 = 77665544
PKHBT r8, r6, r7, LSL #16 @ r8 = 22220000
UHADD8 r4, r4, r5 @ r4 = (src1[3,2,1,0] + src2[3,2,1,0])>>1
PKHTB r7, r7, r6, ASR #16 @ r7 = 33331111
UXTB16 r5, r4 @ r5 = __22__00
UXTB16 r4, r4, ROR #8 @ r4 = __33__11
QADD16 r8, r8, r5 @ r8 = xx22xx00
QADD16 r7, r7, r4 @ r7 = xx33xx11
USAT16 r8, #8, r8 @ r8 = __22__00
USAT16 r7, #8, r7 @ r7 = __33__11
ORR r8, r8, r7, LSL #8 @ r8 = 33221100
STRD r8, r9, [r0], r3
BGT ofrinter2_v6_lp
LDMFD r13!,{r4-r9,PC}
.size oc_frag_recon_inter2_v6, .-oc_frag_recon_inter2_v6 @ ENDP
.endif
.if OC_ARM_ASM_NEON
.global oc_frag_copy_list_neon
.global oc_frag_recon_intra_neon
.global oc_frag_recon_inter_neon
.global oc_frag_recon_inter2_neon
.type oc_frag_copy_list_neon, %function; oc_frag_copy_list_neon: @ PROC
@ r0 = _dst_frame
@ r1 = _src_frame
@ r2 = _ystride
@ r3 = _fragis
@ <> = _nfragis
@ <> = _frag_buf_offs
LDR r12,[r13] @ r12 = _nfragis
STMFD r13!,{r4-r7,r14}
CMP r12, #1
LDRGE r6, [r3] @ r6 = _fragis[fragii]
LDRGE r14,[r13,#4*6] @ r14 = _frag_buf_offs
BLT ofcl_neon_end
@ Stall (2 on Xscale)
LDR r6, [r14,r6, LSL #2] @ r6 = _frag_buf_offs[_fragis[fragii]]
@ Stall (on XScale)
MOV r7, r6 @ Guarantee PLD points somewhere valid.
ofcl_neon_lp:
ADD r4, r1, r6
VLD1.64 {D0}, [r4,:64], r2
ADD r5, r0, r6
VLD1.64 {D1}, [r4,:64], r2
SUBS r12, r12, #1
VLD1.64 {D2}, [r4,:64], r2
LDRGT r6, [r3,#4]! @ r6 = _fragis[fragii]
VLD1.64 {D3}, [r4,:64], r2
LDRGT r6, [r14,r6, LSL #2] @ r6 = _frag_buf_offs[_fragis[fragii]]
VLD1.64 {D4}, [r4,:64], r2
ADDGT r7, r1, r6
VLD1.64 {D5}, [r4,:64], r2
PLD [r7]
VLD1.64 {D6}, [r4,:64], r2
PLD [r7, r2]
VLD1.64 {D7}, [r4,:64]
PLD [r7, r2, LSL #1]
VST1.64 {D0}, [r5,:64], r2
ADDGT r7, r7, r2, LSL #2
VST1.64 {D1}, [r5,:64], r2
PLD [r7, -r2]
VST1.64 {D2}, [r5,:64], r2
PLD [r7]
VST1.64 {D3}, [r5,:64], r2
PLD [r7, r2]
VST1.64 {D4}, [r5,:64], r2
PLD [r7, r2, LSL #1]
VST1.64 {D5}, [r5,:64], r2
ADDGT r7, r7, r2, LSL #2
VST1.64 {D6}, [r5,:64], r2
PLD [r7, -r2]
VST1.64 {D7}, [r5,:64]
BGT ofcl_neon_lp
ofcl_neon_end:
LDMFD r13!,{r4-r7,PC}
.size oc_frag_copy_list_neon, .-oc_frag_copy_list_neon @ ENDP
.type oc_frag_recon_intra_neon, %function; oc_frag_recon_intra_neon: @ PROC
@ r0 = unsigned char *_dst
@ r1 = int _ystride
@ r2 = const ogg_int16_t _residue[64]
VMOV.I16 Q0, #128
VLDMIA r2, {D16-D31} @ D16= 3333222211110000 etc ; 9(8) cycles
VQADD.S16 Q8, Q8, Q0
VQADD.S16 Q9, Q9, Q0
VQADD.S16 Q10,Q10,Q0
VQADD.S16 Q11,Q11,Q0
VQADD.S16 Q12,Q12,Q0
VQADD.S16 Q13,Q13,Q0
VQADD.S16 Q14,Q14,Q0
VQADD.S16 Q15,Q15,Q0
VQMOVUN.S16 D16,Q8 @ D16= 7766554433221100 ; 1 cycle
VQMOVUN.S16 D17,Q9 @ D17= FFEEDDCCBBAA9988 ; 1 cycle
VQMOVUN.S16 D18,Q10 @ D18= NNMMLLKKJJIIHHGG ; 1 cycle
VST1.64 {D16},[r0,:64], r1
VQMOVUN.S16 D19,Q11 @ D19= VVUUTTSSRRQQPPOO ; 1 cycle
VST1.64 {D17},[r0,:64], r1
VQMOVUN.S16 D20,Q12 @ D20= ddccbbaaZZYYXXWW ; 1 cycle
VST1.64 {D18},[r0,:64], r1
VQMOVUN.S16 D21,Q13 @ D21= llkkjjiihhggffee ; 1 cycle
VST1.64 {D19},[r0,:64], r1
VQMOVUN.S16 D22,Q14 @ D22= ttssrrqqppoonnmm ; 1 cycle
VST1.64 {D20},[r0,:64], r1
VQMOVUN.S16 D23,Q15 @ D23= !!,:@zzyyxxwwvvuu ; 1 cycle
VST1.64 {D21},[r0,:64], r1
VST1.64 {D22},[r0,:64], r1
VST1.64 {D23},[r0,:64], r1
MOV PC,R14
.size oc_frag_recon_intra_neon, .-oc_frag_recon_intra_neon @ ENDP
.type oc_frag_recon_inter_neon, %function; oc_frag_recon_inter_neon: @ PROC
@ r0 = unsigned char *_dst
@ r1 = const unsigned char *_src
@ r2 = int _ystride
@ r3 = const ogg_int16_t _residue[64]
VLDMIA r3, {D16-D31} @ D16= 3333222211110000 etc ; 9(8) cycles
VLD1.64 {D0}, [r1], r2
VLD1.64 {D2}, [r1], r2
VMOVL.U8 Q0, D0 @ Q0 = __77__66__55__44__33__22__11__00
VLD1.64 {D4}, [r1], r2
VMOVL.U8 Q1, D2 @ etc
VLD1.64 {D6}, [r1], r2
VMOVL.U8 Q2, D4
VMOVL.U8 Q3, D6
VQADD.S16 Q8, Q8, Q0
VLD1.64 {D0}, [r1], r2
VQADD.S16 Q9, Q9, Q1
VLD1.64 {D2}, [r1], r2
VQADD.S16 Q10,Q10,Q2
VLD1.64 {D4}, [r1], r2
VQADD.S16 Q11,Q11,Q3
VLD1.64 {D6}, [r1], r2
VMOVL.U8 Q0, D0
VMOVL.U8 Q1, D2
VMOVL.U8 Q2, D4
VMOVL.U8 Q3, D6
VQADD.S16 Q12,Q12,Q0
VQADD.S16 Q13,Q13,Q1
VQADD.S16 Q14,Q14,Q2
VQADD.S16 Q15,Q15,Q3
VQMOVUN.S16 D16,Q8
VQMOVUN.S16 D17,Q9
VQMOVUN.S16 D18,Q10
VST1.64 {D16},[r0,:64], r2
VQMOVUN.S16 D19,Q11
VST1.64 {D17},[r0,:64], r2
VQMOVUN.S16 D20,Q12
VST1.64 {D18},[r0,:64], r2
VQMOVUN.S16 D21,Q13
VST1.64 {D19},[r0,:64], r2
VQMOVUN.S16 D22,Q14
VST1.64 {D20},[r0,:64], r2
VQMOVUN.S16 D23,Q15
VST1.64 {D21},[r0,:64], r2
VST1.64 {D22},[r0,:64], r2
VST1.64 {D23},[r0,:64], r2
MOV PC,R14
.size oc_frag_recon_inter_neon, .-oc_frag_recon_inter_neon @ ENDP
.type oc_frag_recon_inter2_neon, %function; oc_frag_recon_inter2_neon: @ PROC
@ r0 = unsigned char *_dst
@ r1 = const unsigned char *_src1
@ r2 = const unsigned char *_src2
@ r3 = int _ystride
LDR r12,[r13]
@ r12= const ogg_int16_t _residue[64]
VLDMIA r12,{D16-D31}
VLD1.64 {D0}, [r1], r3
VLD1.64 {D4}, [r2], r3
VLD1.64 {D1}, [r1], r3
VLD1.64 {D5}, [r2], r3
VHADD.U8 Q2, Q0, Q2 @ Q2 = FFEEDDCCBBAA99887766554433221100
VLD1.64 {D2}, [r1], r3
VLD1.64 {D6}, [r2], r3
VMOVL.U8 Q0, D4 @ Q0 = __77__66__55__44__33__22__11__00
VLD1.64 {D3}, [r1], r3
VMOVL.U8 Q2, D5 @ etc
VLD1.64 {D7}, [r2], r3
VHADD.U8 Q3, Q1, Q3
VQADD.S16 Q8, Q8, Q0
VQADD.S16 Q9, Q9, Q2
VLD1.64 {D0}, [r1], r3
VMOVL.U8 Q1, D6
VLD1.64 {D4}, [r2], r3
VMOVL.U8 Q3, D7
VLD1.64 {D1}, [r1], r3
VQADD.S16 Q10,Q10,Q1
VLD1.64 {D5}, [r2], r3
VQADD.S16 Q11,Q11,Q3
VLD1.64 {D2}, [r1], r3
VHADD.U8 Q2, Q0, Q2
VLD1.64 {D6}, [r2], r3
VLD1.64 {D3}, [r1], r3
VMOVL.U8 Q0, D4
VLD1.64 {D7}, [r2], r3
VMOVL.U8 Q2, D5
VHADD.U8 Q3, Q1, Q3
VQADD.S16 Q12,Q12,Q0
VQADD.S16 Q13,Q13,Q2
VMOVL.U8 Q1, D6
VMOVL.U8 Q3, D7
VQADD.S16 Q14,Q14,Q1
VQADD.S16 Q15,Q15,Q3
VQMOVUN.S16 D16,Q8
VQMOVUN.S16 D17,Q9
VQMOVUN.S16 D18,Q10
VST1.64 {D16},[r0,:64], r3
VQMOVUN.S16 D19,Q11
VST1.64 {D17},[r0,:64], r3
VQMOVUN.S16 D20,Q12
VST1.64 {D18},[r0,:64], r3
VQMOVUN.S16 D21,Q13
VST1.64 {D19},[r0,:64], r3
VQMOVUN.S16 D22,Q14
VST1.64 {D20},[r0,:64], r3
VQMOVUN.S16 D23,Q15
VST1.64 {D21},[r0,:64], r3
VST1.64 {D22},[r0,:64], r3
VST1.64 {D23},[r0,:64], r3
MOV PC,R14
.size oc_frag_recon_inter2_neon, .-oc_frag_recon_inter2_neon @ ENDP
.endif
@ END
.section .note.GNU-stack,"",%progbits