5.14

 +----+                          +----+
 |sum |                          | i  |
 +----+                          +----+
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   v                               |
 +----+       +----+  +----+       |
 |add |<------|mul |<-|load|<------+
 +----+       +----+  +----+       |
   |                               |
   |                               v
   |                             +----+
   |                             |add |
   |  <------- key path          +----+
   |                               |
   |                               |
   v                               v
 +----+                          +-+--+
 |sum |                          | i  |
 +----+                          +----+

A.

every element has 6 long/float add

element count is n/6

so n/6 * 6 = n

CPE bound == 1.0

B.

same like A

/*
 * 5.14.c
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "./lib/vec.h"

#define LEN 24

/* inner product. accumulate in temporary */
void inner4(vec_ptr u, vec_ptr v, data_t *dest) {
  long i;
  long length = vec_length(u);
  data_t *udata = get_vec_start(u);
  data_t *vdata = get_vec_start(v);
  data_t sum = (data_t) 0;

  for (i = 0; i < length-6; i+=6) {
    sum = sum + udata[i] * vdata[i] +
      udata[i+1] * vdata[i+1] +
      udata[i+2] * vdata[i+2] +
      udata[i+3] * vdata[i+3] +
      udata[i+4] * vdata[i+4] +
      udata[i+5] * vdata[i+5];
  }
  for(; i < length; i++) {
    sum = sum + udata[i] * vdata[i];
  }
  *dest = sum;
}

int main(int argc, char* argv[]) {
  vec_ptr u = new_vec(LEN);
  vec_ptr v = new_vec(LEN);

  data_t *arr = (data_t*) malloc(sizeof(data_t) * LEN);
  memset(arr, 0, sizeof(data_t) * LEN);
  arr[0] = 0;
  arr[1] = 1;
  arr[2] = 2;
  arr[3] = 3;

  set_vec_start(u, arr);
  set_vec_start(v, arr);

  data_t res;
  inner4(u, v, &res);

  assert(res == 1+4+9);
  return 0;
}


comments powered by Disqus