ZHCUAQ1F july 2015 – april 2023
_dotp: .cproc a_0, b_0
.rega a_4, tmp0, sum0, prod1, prod2
.regb b_4, tmp1, sum1, prod3, prod4
.reg cnt, sum
.reg val0, val1
ADD 4, a_0, a_4
ADD 4, b_0, b_4
MVK 100, cnt
ZERO sum0
ZERO sum1
loop: .trip 25
LDW *a_0++[2], val0 ; load a[0-1]
LDW *b_0++[2], val1 ; load b[0-1]
MPY val0, val1, prod1 ; a[0] * b[0]
MPYH val0, val1, prod2 ; a[1] * b[1]
ADD prod1, prod2, tmp0 ; sum0 += (a[0]*b[0]) +
ADD tmp0, sum0, sum0 ; (a[1]*b[1])
LDW *a_4++[2], val0 ; load a[2-3]
LDW *b_4++[2], val1 ; load b[2-3]
MPY val0, val1, prod3 ; a[2] * b[2]
MPYH val0, val1, prod4 ; a[3] * b[3]
ADD prod3, prod4, tmp1 ; sum1 =+ (a[2]*b[2]) +
ADD tmp1, sum1, sum1 ; (a[3]*b[3])
[cnt] SUB cnt, 4, cnt ; cnt -= 4
[cnt] B loop ; if (cnt!=0) goto loop
ADD sum0, sum1, sum ; compute final result
.return sum
.endproc