SPRAC21A June 2016 – June 2019 OMAP-L132 , OMAP-L138 , TDA2E , TDA2EG-17 , TDA2HF , TDA2HG , TDA2HV , TDA2LF , TDA2P-ABZ , TDA2P-ACD , TDA2SA , TDA2SG , TDA2SX , TDA3LA , TDA3LX , TDA3MA , TDA3MD , TDA3MV
Following are the three functions used for Cortex-A15 operations:
The C code for the write function is:
void memWrite (UWORD32 DstAddr, UWORD32 transSize) {
register UWORD32 wrData=0xA5B5C5D5;
register UWORD32 i_wr;
register volatile UWORD32 *wrAddr;
wrAddr = (UWORD32*)DstAddr;
for(i_wr=0; i_wr<transSize; i_wr+=128) {
/*128 Words Increment*/
*wrAddr++ =wrData; /*Word 1*/
*wrAddr++ =wrData; /*Word 2*/
*wrAddr++ =wrData; /*Word 3*/
*wrAddr++ =wrData; /*Word 4*/
*wrAddr++ =wrData; /*Word 5*/
...
...
...
*wrAddr++ =wrData; /*Word 127*/
*wrAddr++ =wrData; /*Word 128*/
}
}
The C code for the read function is:
void memRead (UWORD32 SrcAddr, UWORD32 transSize) {
register UWORD32 rdData;
register UWORD32 i_rd;
register volatile UWORD32 *rdAddr;
rdAddr = (UWORD32*)SrcAddr;
for(i_rd=0; i_rd<transSize; i_rd+=128) {
/*128 Words Increment*/
*rdAddr++ =rdData; /*Word 1*/
*rdAddr++ =rdData; /*Word 2*/
*rdAddr++ =rdData; /*Word 3*/
*rdAddr++ =rdData; /*Word 4*/
*rdAddr++ =rdData; /*Word 5*/
...
...
...
*rdAddr++ =rdData; /*Word 127*/
*rdAddr++ =rdData; /*Word 128*/
}
}
The C code for the copy function is:
void memCopy (UWORD32 SrcAddr, UWORD32 DstAddr, UWORD32 trasSize) {
register volatile UWORD32 *rdAddr, *wrAddr;
register UWORD32 i;
rdAddr = (UWORD32*)SrcAddr;
wrAddr = (UWORD32*)DstAddr;
for(i=0; i<transSize; i=i+32) {
*wrAddr++ = *rdAddr++; /*Word 1*/
*wrAddr++ = *rdAddr++; /*Word 2*/
*wrAddr++ = *rdAddr++; /*Word 3*/
*wrAddr++ = *rdAddr++; /*Word 4*/
*wrAddr++ = *rdAddr++; /*Word 5*/
...
...
...
*wrAddr++ = *rdAddr++; /*Word 30*/
*wrAddr++ = *rdAddr++; /*Word 31*/
*wrAddr++ = *rdAddr++; /*Word 32*/
}
}
Additionally, an optimized asm copy is used that is found to have the highest memory copy performance. The parameters passed to the function are:
void memcpy_arm( UWORD32 srcBuffer, UWORD32 destBuffer, UWORD32 numBytes );
.text
.global memcpy_arm
memcpy_arm:
CMP r2,#3
BLS _my_memcpy_lastbytes
ANDS r12,r0,#3
BEQ l1
LDRB r3,[r1],#1
CMP r12,#2
ADD r2,r2,r12
LDRLSB r12,[r1],#1
STRB r3,[r0],#1
LDRCCB r3,[r1],#1
STRLSB r12,[r0],#1
SUB r2,r2,#4
STRCCB r3,[r0],#1
l1:
ANDS r3,r1,#3
BEQ __my_aeabi_memcpy4
l3:
SUBS r2,r2,#8
BCC l2
LDR r3,[r1],#4
LDR r12,[r1],#4
STR r3,[r0],#4
STR r12,[r0],#4
B l3
l2:
ADDS r2,r2,#4
LDRPL r3,[r1],#4
STRPL r3,[r0],#4
MOV r0,r0
_my_memcpy_lastbytes:
LSLS r2,r2,#31
LDRCSB r3,[r1],#1
LDRCSB r12,[r1],#1
LDRMIB r2,[r1],#1
STRCSB r3,[r0],#1
STRCSB r12,[r0],#1
STRMIB r2,[r0],#1
BX lr
__my_aeabi_memcpy4:
__my_aeabi_memcpy8:
__my_rt_memcpy_w:
PUSH {r4-r8,lr}
SUBS r2,r2,#0x20
BCC l4
DSB
PLD [r1, #0x20]
PLD [r1, #0x40]
PLD [r1, #0x60]
PLD [r1, #0x80]
PLD [r1, #0xa0]
PLD [r1, #0xc0]
PLD [r1, #0xe0]
l5:
PLD [r1,#0x100]
LDMCS r1!,{r3-r8,r12,lr}
SUBCSS r2,r2,#0x20
STMCS r0!,{r3-r8,r12,lr}
BCS l5
l4:
LSLS r12,r2,#28
LDMCS r1!,{r3,r4,r12,lr}
STMCS r0!,{r3,r4,r12,lr}
LDMMI r1!,{r3,r4}
STMMI r0!,{r3,r4}
POP {r4-r8,lr}
LSLS r12,r2,#30
LDRCS r3,[r1],#4
STRCS r3,[r0],#4
BXEQ lr
_my_memcpy_lastbytes_aligned:
LSLS r2,r2,#31
LDRCSH r3,[r1],#2
LDRMIB r2,[r1],#1
STRCSH r3,[r0],#2
STRMIB r2,[r0],#1
BX lr
.end