Accelerated MM code

mmf.cpp
#include <iostream>
#include <cstdlib>
#include <ff/farm.hpp>
 
using namespace std; 
using namespace ff; 
void mm(float **a, float **b, float **c, int n); 
void printm(float **a, int n); 
 
/**
 ** additional code needed to represent and process tasks
 **
 **/
typedef struct __task {
  float ** a; 
  float ** b; 
  float ** c; 
  int n; 
} TASK; 
 
 
class MMer: public ff_node {
  void * svc (void * t) {
    TASK * task = (TASK *) t; 
    mm(task->a, task->b, task->c, task->n);
    return(t);
  }
};
/**
 ** end of the additional code
 **/ 
 
int main(int argc, char * argv[]) {
  if(argc==1) {
    cout << "Usage is:\nmm n seed m" << endl;
    return(0); 
  }
  int n = atoi(argv[1]); // dimensions of the matrixes
  int s = atoi(argv[2]); // seed for random number generator
  int m = atoi(argv[3]); // number of matrixes to use
 
 
  /*
   * this is the code needed to set up the accelerator
   *
   */
  ff_farm<> swacc(true); // true means use it as an accelerator
  int cores =  sysconf( _SC_NPROCESSORS_ONLN ) - 1;
  cout << "Using " << cores << " cores for the accelerator" << endl; 
  vector<ff_node*> w; 
  for(int i=0; i<cores-1; i++) {
    w.push_back(new MMer()); 
  }
  swacc.add_workers(w);
  swacc.add_collector(NULL); // default collector
  swacc.run();
  /*
   * end of the accelerator code
   */
 
  srand(s); 
 
  for(int t=0; t<m; t++) {
    // create matrix A
    float **A = new float* [n]; 
    for(int i=0; i<n; i++) 
      A[i] = new float[n]; 
    // create matrix B
    float **B = new float* [n]; 
    for(int i=0; i<n; i++) 
      B[i] = new float[n]; 
    // create matrix C
    float **C = new float* [n]; 
    for(int i=0; i<n; i++) 
      C[i] = new float[n]; 
 
    // init matrixes
    for(int i=0; i<n; i++) 
      for(int j=0; j<n; j++) {
	A[i][j] = ((float) rand()) / ((float) RAND_MAX); 
	B[i][j] = ((float) rand()) / ((float) RAND_MAX); 
	C[i][j] = 0.0;
      }
    // offload
    // was mm(A,B,C,n); 
    TASK * task = new TASK(); 
    task->a = A; task->b = B; task->c = C; task->n = n;
    swacc.offload((void *)task); 
 
    // next matrix 
    // printm(A,n); 
    // printm(B,n);
    // printm(C,n);
  }
  /** 
   ** now we need to terminate accelerator
   **/
  swacc.offload((void *) FF_EOS);
  swacc.wait();
}
 
void mm(float **a, float **b, float **c, int n) {
  for(int i=0; i<n; i++) 
    for(int j=0; j<n; j++) 
      for(int k=0; k<n; k++) 
	c[i][j] += a[i][k]*b[k][j];
  return;
}
 
void printm(float **a, int n) {
  for(int i=0; i<n; i++) {
    for(int j=0; j<n; j++) 
      cout << a[i][j] << " "; 
    cout << endl; 
  }
  cout << endl; 
}
 
/*  lenovo      seq      omp       ff
  128 123 32    0.59     0.37
*/