#// Copyright 2010 The Code Cavern #// bda_copyd(bda_ptr,bda_srcptr,bda_len) .global bda_copyd bda_copyd: lea 16(%rsi),%rsi lea 16(%rdi),%rdi sub $4,%rdx jc skiplp .align 16 lp: movdqu (%rsi,%rdx,8),%xmm0 movdqu -16(%rsi,%rdx,8),%xmm1 sub $4,%rdx movdqu %xmm1,-16+32(%rdi,%rdx,8) movdqu %xmm0,32(%rdi,%rdx,8) jnc lp skiplp: cmp $-2,%rdx jg case3 je case2 jnp case0 case1: mov 8(%rsi,%rdx,8),%rax mov %rax,8(%rdi,%rdx,8) case0: ret case3: movdqu (%rsi,%rdx,8),%xmm0 mov -8(%rsi,%rdx,8),%rax mov %rax,-8(%rdi,%rdx,8) movdqu %xmm0,(%rdi,%rdx,8) ret case2: movdqu (%rsi,%rdx,8),%xmm0 movdqu %xmm0,(%rdi,%rdx,8) ret