I tried to implement unsigned 32 bit multiplication to be used on CH32V003 chip which doesn't have a hardware multiplier. I translated a code used for AVR to RISCV assembly (shift and add method) which outputs a 64bit answer. Most of the numbers i tried were multiplied correctly including max 0xffffffff times 0xffffffff. When i try 0xffffffff times 0x19 i get 0x12ffffffe7 but the correct answer should be 0x18ffffffe7. I tried multiple codes of different permutations and combinations provided by CHATGPT,GEMINI,DEEPSEEK etc but the faulty answer persists. All the AI chatbots gave up. I am a hobbyist and my knowledge in this subject is limited. I am simulating the code in RARS. The only code I was able to get correct answer was repeated addition of the multiplicand multiplier times with carry/overflow propagation. I doubt the shift and add method fails because of no proper way to detect overflow like carry bit. If any members can find a solution i'd be grateful.
my code translated from AVR`
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicant
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
start:
call ROR # rotate right multiplier to test lsb is 0 or 1
bnez x3,multiply # if lsb =1 branch to repeated adding of multiplicant to result register
finishmul:
call RLL2 # shift multiplicand left or multiply by 2
beqz a2,exit_proc
J start # repeat loop
exit_proc:
j exit_proc
#ret
multiply:
add a5,a3,a1 # add multiplicant to low result register and store final result in a5 for processing
sltu a0,a5,a3 # set a0 to 1 if result of addition a3:a1 i a5 is greater than a3
sltu x3,a5,a1 # set x3 to 1 if result of addition a3:a1 in a5 is greater than a1
or a0,a0,x3 # or a0 and x3 , if 1 carry if a0 = 0 no carry
bnez a0,carryset # if a0 = 1 carry set, branch to label carry set
mv a3,a5 # result in working register copied to a3 low result register
J finishmul # jump to label finishmul
carryset: # reach here only if carryset
mv a3,a5 # copy a5 to low result a3
addi a4,a4,1 # add carry to a4 high register result
J finishmul # jump to label finishmul
ROR:
li x3,0 # clear carry
mv t0,a2 # copy number in a2 to t0
andi t0,t0,1 # extract lsb is 0 or 1
beqz t0,zzz # if lab is 0 branch to zzz
li x3,1 # if lsb is 1 carry occured , load 1 in carry register x3
srli a2,a2,1 # shift right a2 by 1 postion
ret # return to caller
zzz: # reach here if lsb =0
li x3,0 # load x3 0 indicating carry bit is 0
srli a2,a2,1 # right shift multiplier once. divide multiplier by 2
ret # return to caller
ROL:
li x3,0 #
mv t0,a2
li x3,0x80000000
and t0,t0,x3
beqz t0,zzz1
li x3,1 # carry
slli a2,a2,1
ret
zzz1:
li x3,0
slli a2,a2,1
ret
RLL2: # rotate left 2 registers a3:a5
mv a5,a4 # copy contents of a4 to a5
li x3,0 # clear x3
mv t0,a1 # copy multiplicant to t0
li x3 ,0x80000000 # load x3 MSB bitmask
and t0,t0,x3 # and with 0x800000000 to extract the MSB
bnez t0,OR1 # if MSB = 1 branch to OR1 label
slli a1,a1,1 # shift left 1 position a1 register ( multiplicant)
slli a5,a5,1 # shift left 1 position working register with value of a4 register ( multiplicant)
beqz a2,exit # if multiplier register is 0 exit
mv a4,a5 # copy back the shifter multiplicant to a4
ret
OR1:
mv a5,a4
slli a1,a1,1
slli a5,a5,1
li x3,1
or a5,a5,x3
beqz a2,exit
mv a4,a5
ret
exit:
ret
i tried above code and all the answer for 0xffffffff multiplied by 0x19 is 0x12ffffffe7 in RARS. It should be 0x18ffffffe7 as per calculator. If i do repeated addition of 0xffffffff 0x19 times i get the correct answer 0x18ffffffe7.
I tried to implement unsigned 32 bit multiplication to be used on CH32V003 chip which doesn't have a hardware multiplier. I translated a code used for AVR to RISCV assembly (shift and add method) which outputs a 64bit answer. Most of the numbers i tried were multiplied correctly including max 0xffffffff times 0xffffffff. When i try 0xffffffff times 0x19 i get 0x12ffffffe7 but the correct answer should be 0x18ffffffe7. I tried multiple codes of different permutations and combinations provided by CHATGPT,GEMINI,DEEPSEEK etc but the faulty answer persists. All the AI chatbots gave up. I am a hobbyist and my knowledge in this subject is limited. I am simulating the code in RARS. The only code I was able to get correct answer was repeated addition of the multiplicand multiplier times with carry/overflow propagation. I doubt the shift and add method fails because of no proper way to detect overflow like carry bit. If any members can find a solution i'd be grateful.
my code translated from AVR`
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicant
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
start:
call ROR # rotate right multiplier to test lsb is 0 or 1
bnez x3,multiply # if lsb =1 branch to repeated adding of multiplicant to result register
finishmul:
call RLL2 # shift multiplicand left or multiply by 2
beqz a2,exit_proc
J start # repeat loop
exit_proc:
j exit_proc
#ret
multiply:
add a5,a3,a1 # add multiplicant to low result register and store final result in a5 for processing
sltu a0,a5,a3 # set a0 to 1 if result of addition a3:a1 i a5 is greater than a3
sltu x3,a5,a1 # set x3 to 1 if result of addition a3:a1 in a5 is greater than a1
or a0,a0,x3 # or a0 and x3 , if 1 carry if a0 = 0 no carry
bnez a0,carryset # if a0 = 1 carry set, branch to label carry set
mv a3,a5 # result in working register copied to a3 low result register
J finishmul # jump to label finishmul
carryset: # reach here only if carryset
mv a3,a5 # copy a5 to low result a3
addi a4,a4,1 # add carry to a4 high register result
J finishmul # jump to label finishmul
ROR:
li x3,0 # clear carry
mv t0,a2 # copy number in a2 to t0
andi t0,t0,1 # extract lsb is 0 or 1
beqz t0,zzz # if lab is 0 branch to zzz
li x3,1 # if lsb is 1 carry occured , load 1 in carry register x3
srli a2,a2,1 # shift right a2 by 1 postion
ret # return to caller
zzz: # reach here if lsb =0
li x3,0 # load x3 0 indicating carry bit is 0
srli a2,a2,1 # right shift multiplier once. divide multiplier by 2
ret # return to caller
ROL:
li x3,0 #
mv t0,a2
li x3,0x80000000
and t0,t0,x3
beqz t0,zzz1
li x3,1 # carry
slli a2,a2,1
ret
zzz1:
li x3,0
slli a2,a2,1
ret
RLL2: # rotate left 2 registers a3:a5
mv a5,a4 # copy contents of a4 to a5
li x3,0 # clear x3
mv t0,a1 # copy multiplicant to t0
li x3 ,0x80000000 # load x3 MSB bitmask
and t0,t0,x3 # and with 0x800000000 to extract the MSB
bnez t0,OR1 # if MSB = 1 branch to OR1 label
slli a1,a1,1 # shift left 1 position a1 register ( multiplicant)
slli a5,a5,1 # shift left 1 position working register with value of a4 register ( multiplicant)
beqz a2,exit # if multiplier register is 0 exit
mv a4,a5 # copy back the shifter multiplicant to a4
ret
OR1:
mv a5,a4
slli a1,a1,1
slli a5,a5,1
li x3,1
or a5,a5,x3
beqz a2,exit
mv a4,a5
ret
exit:
ret
i tried above code and all the answer for 0xffffffff multiplied by 0x19 is 0x12ffffffe7 in RARS. It should be 0x18ffffffe7 as per calculator. If i do repeated addition of 0xffffffff 0x19 times i get the correct answer 0x18ffffffe7.
Share Improve this question asked Mar 12 at 10:12 sajeev sankaransajeev sankaran 13 bronze badges 7- 1 Some comments about this: 1) don't use xN registers, stick to named registers (you don't know which register it is aliasing), 2) RISC-V is a 3-register architecture; your code stayed 2-register, and deserves some cleaning up, 3) in a similar manner, there are many unnecessary jumps, e.g. in ROR you got your result in t0 and then for some reason you use it to branch off and manually set same value in x3; 4) "multiply" is actually "add", "ROL" is not used, and "RLL2" doesn't do what it says on the label or in the comment (why does it touch a1? why does it look at a2?) – Andrey Turkin Commented Mar 12 at 10:19
- Sir, I deleted the ROL part as you pointed out. The 2 register is because i am using pseudo instructions, if that was what you were pointing out. "multiply" is just a label i gave to spot easily. registers a1:a5 (result_hi) & (multiplicand) are shifted left if MSB of multiplicand is 0 . If multiplicand MSB is 1 registers a1:a5 (result_hi) & (multiplicand) are shifted left and 1 is inserted as LSB of a5 (as if bit1 in carry would have been shifted). I am not sure that i am clear here. Thanks – sajeev sankaran Commented Mar 12 at 11:07
- a2 is being checked for 0 to exit and not perform unnecessary extra additions – sajeev sankaran Commented Mar 12 at 11:12
- I actually dont know how shift and add method works. it would be nice If i can get advice on how to implement it on RISCV with assembly code and not using MUL op code. Would also like to test (0xffffffff) * (0x190) will yeild correct answer? – sajeev sankaran Commented Mar 12 at 11:32
- In shift-and-add, you'd usually shift one of the multiplicands right, and shift the other left (and keep adding it to the result when necessary). And this is mostly happens in your code; however you need double-register width for the multiplicand being shifted left, and double-register for the result. And here in your code RLL2 seems to be doing double-register shift left for a4:a1 pair (so it would seem to be the multiplicand pair), but a4 is ALSO top part of the result. Pretty sure that can't work. – Andrey Turkin Commented Mar 12 at 13:18
1 Answer
Reset to default 0I managed to write a new routine that implements 32x32=64 bit multiplication using the shift and add with double register method. If multiplier lsb is 1 add multiplicand to high 32bit register then shift right high result & low result register 1 bit. If multiplier lsb is 0 shift the result high & low result register right 1 bit. Also shift right 1 bit the multiplier. This above process repeated 32 times. When multiplicand is added to the high result if carry occurs 1 is ORed to the MSB of the carry register. During shifting carry register, result_hi register, result_lo is shifted as a block towards right by 1 bit each time. This is the final code and works for the previous value 0xffffffff X 0x19 = 0x18ffffffe7. Thanks to everybody for the support
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicand
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
li x5,32 # number of bits to be tested/counter
loop:
mv x3,a2 # copy multiplier to test lsb 1 or 0
andi x3,x3,1 # extract lsb in x3
bnez x3,addnshift1 # if x3 is 1 branch to add and shift
call shift # if x3 is 0 call routine to shift result hi and lo + carry register right
addi x5,x5,-1 # decrease counter
bnez x5,loop # if counter is not 0 go to label loop
slli t6,t6,1 # if counter is 0, shift carry register left 1 time ( i dont know why but corrects answer)
j exit # exit multiplication procedure
addnshift1:
call addnshift # call addnshift routine to add multiplicand to result_hi and shift both result_hi & result_lo
addi x5,x5,-1 # decrease counter
bnez x5,loop # if counter is more than 0 branch to label loop
slli t6,t6,1 # if counter is 0, shift carry register left 1 time ( i dont know why but corrects answer)
j exit # exit multiplication procedure
shift:
srli a2,a2,1 # multiplier right shift, 1 lsb lost
srli a3,a3,1 # 2n low register(a3) right shift and 0 in msb (a4:a3)
mv x4,a4 # a copy of high 2n register(a4) to x4 (a4:a3)
andi x4,x4,1 # copy lsb of a4 high 2n register
beqz x4,lsb0 # if lsb extracted is 0 , branch to lsb0 label
li x4,0x80000000 # if lsb of a4 was 1
or a3,a3,x4 # lsb of a4 now in msb of a3. (a4:a3 >> 1)
lsb0:
srli a4,a4,1 # 2n high register right shift ,same as 0 shifted between a4 to a3 >>
srli t6,t6,1 # shift right carry register together with a4:a3
ret # return to main program
addnshift:
add a4,a4,a1 # add multiplicand to high 2n register
sltu x8 a4,a1 # set x8 to 1 if result of addition (a4 + a1) answer_hi and multiplicand
bnez x8,setcarry # if x8 is not 0 , branch to setcarry label
return:
srli a2,a2,1 # multiplier right shift
srli a3,a3,1 # 2n low register right shift and 0 in msb
mv x4,a4 # a copy of lw 2n
andi x4,x4,1 # copy lsb of a4 high 2n register
beqz x4,addlsb0 # if lsb extracted is 0 , branch to addlsb0 label
li x4,0x80000000 # if lsb of a4 was 1
or a3,a3,x4 # lsb of a4 now in msb of a3. (a4:a3 >> 1)
addlsb0:
srli a4,a4,1 # 2n high register right shift
srli t6,t6,1 # shift right carry register together with a4:a3
ret # return to main program
setcarry:
li x7,0x80000000 # set msb of x7 with 0x80000000
or t6,t6,x7 # set msb of x7 by oring t6 with x7
j return # jump to shifting routine
exit:
beqz t6,nocarry # if t6 is not set , 0 , no overflow occurred, branch to nocarry
mv a4,t6 # if carry set , copy t6 to answer hi register
nocarry:
la a0,result_hi #
sw a4,0(a0) # save to data section
la a0,result_lo
sw a3,0(a0) # save to data section
end:
j end
–