3.59

assume

multiple

2^128 overflows, so we don’t care about it

# void store_prod(int128_t* dest, int64_t x, int64_t y)
# dest in %rdi, x in %rsi, y in %rdx
store_prod:
  movq %rdx, %rax     # %rax = y
  cqto                # (int128_t)y, %rdx = (-1)y_63
  movq %rsi, %rcx     # %rcx = x

  # x >> 63, if x == 1, %rcx = -1; if x_63 == 0, %rcx = 0
  # %rcx = (-1)x_63
  sarq $63, %rcx

  # pay attention, imulq behaves differently according to param number(1/2)
  imulq %rax, %rcx    # %rcx = y * -x_63
  imulq %rsi, %rdx    # %rdx = x * -y_63
  addq %rdx, %rcx     # %rcx = x * -y_63 + y * -x_63
  mulq %rsi           # %rdx:%rax <= ux * uy

  # lower 64 bits are same for x * y and ux * uy. ref (2.18) on book
  # %rdx = ux * uy(high 64 bits) - (x_{63}y + y_{63}x)2^{64}
  addq %rcx, %rdx

  movq %rax, (%rdi)   # set lower 64bits
  movq %rdx, 8(%rdi)  # set higher 64bits
  ret
comments powered by Disqus