A.
+----+----+----+----+-----+-----+
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
+----+----+----+----+-----+-----++----+
+----|----|----|----------|--->| |
| +----|----|----------|--->|load| vmovad 0(%rbp,%rcx,8),%xmm1
| | | | +-----|----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | | |load|---+
| | +----|----------|--->| | |
| | | | | | +----+ | vmulsd (%rax,%rcx,8),%xmm1,%xmm0
| | | | | | | |<--+
| | | | +-----|--->|mul |
| | | | +-----|----| |
| | | | | | +----+
| | | | +-----|--->| |
| | | | | +--->|add | vaddsd %xmm1,%xmm0,%xmm0
| | | | | +----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | |add | addq $1, %rcx
| +----|----|----------|----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | | |cmp |---+ cmpq %rbx, %rcx
| | | +----|-----|--->| | |
| | | | | | +----+ |
| | | | | | | | |
| | | | | | |jne |<--+ jne .L15
| | | | | | | |
| | | | | | +----+
v v v v v v
+----+----+----+----+-----+-----+
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
+----+----+----+----+-----+-----+
+----+ +-----+
|%rcx| |%xmm0|
+----+ +-----+
| |
| +----+ | <--------- key path
+---->|load|------+ |
| +----+ | |
| v v
| +----+ +-+--+ +-+--+
+---->|load|--->|mul |--->|add |
| +----+ +----+ +----+
| |
| |
v |
+----+ |
|add | |
+----+ |
| |
v v
+-+--+ +-----+
|%rcx| |%xmm0|
+----+ +-----+
B.
5-12
float add cell, CPE is 3.0
C.
5-12
long add cell, 1.0
D.
only float add on key path
/*
* 5.13.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "./lib/vec.h"
/* inner product. accumulate in temporary */
void inner4(vec_ptr u, vec_ptr v, data_t *dest) {
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum = (data_t) 0;
for (i = 0; i < length; i++) {
sum = sum + udata[i] * vdata[i];
}
*dest = sum;
}
int main(int argc, char* argv[]) {
vec_ptr u = new_vec(4);
vec_ptr v = new_vec(4);
data_t *arr = (data_t*) malloc(sizeof(data_t) * 4);
arr[0] = 0;
arr[1] = 1;
arr[2] = 2;
arr[3] = 3;
set_vec_start(u, arr);
set_vec_start(v, arr);
data_t res;
inner4(u, v, &res);
assert(res == 1+4+9);
return 0;
}